mindspore 2.6.0rc1__cp310-cp310-win_amd64.whl → 2.7.0rc1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (407) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +1 -1
  5. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +40 -9
  9. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  10. mindspore/_extends/optimize/cell_utils.py +96 -0
  11. mindspore/_extends/parse/__init__.py +2 -2
  12. mindspore/_extends/parse/compile_config.py +44 -22
  13. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
  14. mindspore/_extends/parse/parser.py +37 -62
  15. mindspore/_extends/parse/resources.py +39 -0
  16. mindspore/_extends/parse/standard_method.py +43 -13
  17. mindspore/_extends/parse/trope.py +8 -1
  18. mindspore/_extends/pijit/__init__.py +1 -2
  19. mindspore/amp.py +4 -4
  20. mindspore/atlprov.dll +0 -0
  21. mindspore/avcodec-59.dll +0 -0
  22. mindspore/avdevice-59.dll +0 -0
  23. mindspore/avfilter-8.dll +0 -0
  24. mindspore/avformat-59.dll +0 -0
  25. mindspore/avutil-57.dll +0 -0
  26. mindspore/boost/adasum.py +1 -1
  27. mindspore/boost/boost_cell_wrapper.py +4 -4
  28. mindspore/c1.dll +0 -0
  29. mindspore/c1xx.dll +0 -0
  30. mindspore/c2.dll +0 -0
  31. mindspore/common/__init__.py +27 -2
  32. mindspore/common/_grad_function.py +2 -1
  33. mindspore/common/_pijit_context.py +28 -7
  34. mindspore/common/_stub_tensor.py +1 -209
  35. mindspore/common/_tensor_cpp_method.py +1 -1
  36. mindspore/common/_tensor_docs.py +77 -16
  37. mindspore/common/api.py +238 -113
  38. mindspore/common/dtype.py +21 -11
  39. mindspore/common/dump.py +10 -15
  40. mindspore/common/generator.py +5 -3
  41. mindspore/common/hook_handle.py +11 -2
  42. mindspore/common/jit_config.py +1 -1
  43. mindspore/common/jit_trace.py +84 -105
  44. mindspore/common/parameter.py +26 -12
  45. mindspore/common/recompute.py +3 -3
  46. mindspore/common/sparse_tensor.py +0 -3
  47. mindspore/common/symbol.py +0 -1
  48. mindspore/common/tensor.py +81 -81
  49. mindspore/communication/_comm_helper.py +46 -4
  50. mindspore/communication/management.py +79 -7
  51. mindspore/context.py +58 -40
  52. mindspore/dataset/core/config.py +3 -3
  53. mindspore/dataset/engine/datasets.py +20 -7
  54. mindspore/dataset/engine/datasets_user_defined.py +33 -3
  55. mindspore/dataset/engine/iterators.py +2 -2
  56. mindspore/dataset/engine/obs/config_loader.py +2 -2
  57. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  58. mindspore/dataset/transforms/py_transforms.py +7 -3
  59. mindspore/dataset/transforms/transforms.py +7 -3
  60. mindspore/dataset/vision/validators.py +1 -0
  61. mindspore/device_context/ascend/device.py +1 -1
  62. mindspore/device_context/gpu/__init__.py +2 -2
  63. mindspore/device_context/gpu/device.py +1 -1
  64. mindspore/device_context/gpu/op_precision.py +4 -2
  65. mindspore/device_context/gpu/op_tuning.py +6 -3
  66. mindspore/device_manager.py +16 -9
  67. mindspore/dnnl.dll +0 -0
  68. mindspore/dpcmi.dll +0 -0
  69. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -7
  70. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  71. mindspore/experimental/optim/adadelta.py +13 -20
  72. mindspore/experimental/optim/adagrad.py +15 -22
  73. mindspore/experimental/optim/adam.py +17 -24
  74. mindspore/experimental/optim/adamax.py +14 -22
  75. mindspore/experimental/optim/adamw.py +28 -34
  76. mindspore/experimental/optim/asgd.py +15 -25
  77. mindspore/experimental/optim/lr_scheduler.py +27 -45
  78. mindspore/experimental/optim/nadam.py +14 -24
  79. mindspore/experimental/optim/optimizer.py +13 -23
  80. mindspore/experimental/optim/radam.py +18 -24
  81. mindspore/experimental/optim/rmsprop.py +14 -25
  82. mindspore/experimental/optim/rprop.py +15 -26
  83. mindspore/experimental/optim/sgd.py +9 -19
  84. mindspore/hal/__init__.py +4 -4
  85. mindspore/hal/contiguous_tensors_handle.py +2 -2
  86. mindspore/hal/memory.py +27 -7
  87. mindspore/include/api/cell.h +37 -1
  88. mindspore/include/api/delegate.h +10 -0
  89. mindspore/include/api/model.h +3 -0
  90. mindspore/include/api/types.h +2 -2
  91. mindspore/include/c_api/model_c.h +0 -58
  92. mindspore/include/c_api/tensor_c.h +0 -26
  93. mindspore/include/dataset/vision_ascend.h +1 -1
  94. mindspore/jpeg62.dll +0 -0
  95. mindspore/mindrecord/tools/cifar10.py +60 -11
  96. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  97. mindspore/mindspore_backend_common.dll +0 -0
  98. mindspore/mindspore_backend_manager.dll +0 -0
  99. mindspore/mindspore_common.dll +0 -0
  100. mindspore/mindspore_core.dll +0 -0
  101. mindspore/mindspore_cpu_res_manager.dll +0 -0
  102. mindspore/mindspore_dump.dll +0 -0
  103. mindspore/mindspore_frontend.dll +0 -0
  104. mindspore/mindspore_glog.dll +0 -0
  105. mindspore/mindspore_memory_pool.dll +0 -0
  106. mindspore/mindspore_ms_backend.dll +0 -0
  107. mindspore/mindspore_ops.dll +0 -0
  108. mindspore/mindspore_ops_host.dll +0 -0
  109. mindspore/mindspore_ops_kernel_common.dll +0 -0
  110. mindspore/mindspore_profiler.dll +0 -0
  111. mindspore/mindspore_pyboost.dll +0 -0
  112. mindspore/mindspore_pynative.dll +0 -0
  113. mindspore/mindspore_res_manager.dll +0 -0
  114. mindspore/mindspore_runtime_pipeline.dll +0 -0
  115. mindspore/mint/__init__.py +6 -46
  116. mindspore/mint/distributed/__init__.py +1 -0
  117. mindspore/mint/distributed/distributed.py +212 -9
  118. mindspore/mint/nn/__init__.py +1 -1
  119. mindspore/mint/nn/functional.py +53 -6
  120. mindspore/mint/nn/layer/_functions.py +164 -294
  121. mindspore/mint/nn/layer/activation.py +8 -6
  122. mindspore/mint/nn/layer/conv.py +137 -101
  123. mindspore/mint/nn/layer/normalization.py +8 -22
  124. mindspore/mint/optim/adam.py +19 -18
  125. mindspore/mint/optim/adamw.py +14 -8
  126. mindspore/mint/optim/sgd.py +5 -5
  127. mindspore/msobj140.dll +0 -0
  128. mindspore/mspdb140.dll +0 -0
  129. mindspore/mspdbcore.dll +0 -0
  130. mindspore/mspdbst.dll +0 -0
  131. mindspore/mspft140.dll +0 -0
  132. mindspore/msvcdis140.dll +0 -0
  133. mindspore/msvcp140_1.dll +0 -0
  134. mindspore/msvcp140_2.dll +0 -0
  135. mindspore/msvcp140_atomic_wait.dll +0 -0
  136. mindspore/msvcp140_codecvt_ids.dll +0 -0
  137. mindspore/nn/cell.py +328 -502
  138. mindspore/nn/grad/cell_grad.py +11 -12
  139. mindspore/nn/layer/activation.py +32 -34
  140. mindspore/nn/layer/basic.py +67 -64
  141. mindspore/nn/layer/channel_shuffle.py +4 -4
  142. mindspore/nn/layer/combined.py +4 -2
  143. mindspore/nn/layer/conv.py +117 -110
  144. mindspore/nn/layer/dense.py +9 -7
  145. mindspore/nn/layer/embedding.py +50 -52
  146. mindspore/nn/layer/image.py +37 -39
  147. mindspore/nn/layer/math.py +111 -112
  148. mindspore/nn/layer/normalization.py +56 -44
  149. mindspore/nn/layer/pooling.py +58 -63
  150. mindspore/nn/layer/rnn_cells.py +33 -33
  151. mindspore/nn/layer/rnns.py +56 -56
  152. mindspore/nn/layer/thor_layer.py +74 -73
  153. mindspore/nn/layer/transformer.py +11 -1
  154. mindspore/nn/learning_rate_schedule.py +20 -20
  155. mindspore/nn/loss/loss.py +79 -81
  156. mindspore/nn/optim/adam.py +3 -3
  157. mindspore/nn/optim/adasum.py +2 -2
  158. mindspore/nn/optim/asgd.py +2 -0
  159. mindspore/nn/optim/optimizer.py +1 -1
  160. mindspore/nn/optim/thor.py +2 -2
  161. mindspore/nn/probability/distribution/exponential.py +2 -1
  162. mindspore/nn/probability/distribution/poisson.py +2 -1
  163. mindspore/nn/sparse/sparse.py +3 -3
  164. mindspore/nn/wrap/cell_wrapper.py +34 -37
  165. mindspore/nn/wrap/grad_reducer.py +37 -37
  166. mindspore/nn/wrap/loss_scale.py +72 -74
  167. mindspore/numpy/array_creations.py +5 -5
  168. mindspore/numpy/fft.py +1 -1
  169. mindspore/numpy/math_ops.py +5 -5
  170. mindspore/opencv_core452.dll +0 -0
  171. mindspore/opencv_imgcodecs452.dll +0 -0
  172. mindspore/opencv_imgproc452.dll +0 -0
  173. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  174. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  175. mindspore/ops/_vmap/vmap_array_ops.py +31 -13
  176. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  177. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +42 -11
  178. mindspore/ops/auto_generate/gen_extend_func.py +23 -141
  179. mindspore/ops/auto_generate/gen_ops_def.py +727 -321
  180. mindspore/ops/auto_generate/gen_ops_prim.py +1721 -984
  181. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  182. mindspore/ops/composite/__init__.py +10 -0
  183. mindspore/ops/composite/base.py +8 -4
  184. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  185. mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
  186. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  187. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  188. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  189. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  190. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  191. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  192. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  193. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  194. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  195. mindspore/ops/function/__init__.py +3 -1
  196. mindspore/ops/function/_add_attr_func.py +11 -6
  197. mindspore/ops/function/array_func.py +9 -96
  198. mindspore/ops/function/debug_func.py +4 -3
  199. mindspore/ops/function/grad/grad_func.py +1 -1
  200. mindspore/ops/function/math_func.py +33 -540
  201. mindspore/ops/function/nn_func.py +28 -74
  202. mindspore/ops/function/other_func.py +4 -1
  203. mindspore/ops/function/random_func.py +44 -5
  204. mindspore/ops/function/vmap_func.py +2 -1
  205. mindspore/ops/functional.py +2 -3
  206. mindspore/ops/functional_overload.py +571 -6
  207. mindspore/ops/op_info_register.py +21 -0
  208. mindspore/ops/operations/__init__.py +16 -11
  209. mindspore/ops/operations/_custom_ops_utils.py +689 -34
  210. mindspore/ops/operations/_inner_ops.py +3 -6
  211. mindspore/ops/operations/_sequence_ops.py +1 -1
  212. mindspore/ops/operations/array_ops.py +2 -2
  213. mindspore/ops/operations/comm_ops.py +185 -26
  214. mindspore/ops/operations/custom_ops.py +294 -174
  215. mindspore/ops/operations/debug_ops.py +59 -4
  216. mindspore/ops/operations/image_ops.py +13 -13
  217. mindspore/ops/operations/manually_defined/ops_def.py +15 -16
  218. mindspore/ops/operations/math_ops.py +3 -4
  219. mindspore/ops/operations/nn_ops.py +7 -39
  220. mindspore/ops/primitive.py +6 -10
  221. mindspore/ops/tensor_method.py +47 -8
  222. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  223. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  224. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  225. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  226. mindspore/ops_generate/common/base_generator.py +14 -0
  227. mindspore/ops_generate/common/gen_constants.py +8 -3
  228. mindspore/ops_generate/common/gen_utils.py +0 -19
  229. mindspore/ops_generate/common/op_proto.py +11 -4
  230. mindspore/ops_generate/common/template.py +88 -11
  231. mindspore/ops_generate/gen_ops.py +1 -1
  232. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  233. mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
  234. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  235. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  236. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  237. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  238. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  239. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
  240. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  241. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  242. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  243. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  244. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  245. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  246. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  247. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  248. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  249. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  250. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  251. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  252. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  253. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  254. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  255. mindspore/parallel/_auto_parallel_context.py +11 -8
  256. mindspore/parallel/_cell_wrapper.py +113 -45
  257. mindspore/parallel/_parallel_serialization.py +1 -1
  258. mindspore/parallel/_ps_context.py +4 -6
  259. mindspore/parallel/_tensor.py +167 -12
  260. mindspore/parallel/_transformer/moe.py +1 -1
  261. mindspore/parallel/_transformer/transformer.py +13 -8
  262. mindspore/parallel/auto_parallel.py +14 -7
  263. mindspore/parallel/checkpoint_convert.py +3 -3
  264. mindspore/parallel/checkpoint_transform.py +11 -7
  265. mindspore/parallel/cluster/process_entity/_api.py +84 -48
  266. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  267. mindspore/parallel/cluster/run.py +43 -4
  268. mindspore/parallel/function/__init__.py +8 -1
  269. mindspore/parallel/function/reshard_func.py +6 -7
  270. mindspore/parallel/nn/__init__.py +15 -2
  271. mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
  272. mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
  273. mindspore/parallel/shard.py +3 -4
  274. mindspore/parallel/transform_safetensors.py +463 -174
  275. mindspore/pgodb140.dll +0 -0
  276. mindspore/pgort140.dll +0 -0
  277. mindspore/profiler/__init__.py +2 -1
  278. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  279. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  280. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
  281. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  282. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  283. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  284. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  285. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  286. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  287. mindspore/profiler/analysis/task_manager.py +1 -1
  288. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  289. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  290. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
  291. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  292. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  293. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  294. mindspore/profiler/common/constant.py +16 -0
  295. mindspore/profiler/common/profiler_context.py +25 -27
  296. mindspore/profiler/common/profiler_info.py +0 -16
  297. mindspore/profiler/common/profiler_op_analyse.py +235 -0
  298. mindspore/profiler/common/profiler_output_path.py +23 -8
  299. mindspore/profiler/common/profiler_parameters.py +128 -35
  300. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  301. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  302. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  303. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  304. mindspore/profiler/dynamic_profiler.py +305 -314
  305. mindspore/profiler/envprofiler.py +12 -7
  306. mindspore/profiler/experimental_config.py +96 -6
  307. mindspore/profiler/mstx.py +33 -12
  308. mindspore/profiler/platform/__init__.py +2 -3
  309. mindspore/profiler/platform/npu_profiler.py +29 -19
  310. mindspore/profiler/profiler.py +35 -19
  311. mindspore/profiler/profiler_action_controller.py +64 -76
  312. mindspore/profiler/schedule.py +10 -4
  313. mindspore/rewrite/common/config.py +1 -0
  314. mindspore/rewrite/common/namer.py +1 -0
  315. mindspore/rewrite/common/namespace.py +1 -0
  316. mindspore/rewrite/node/node.py +31 -11
  317. mindspore/rewrite/parsers/assign_parser.py +1 -1
  318. mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
  319. mindspore/run_check/_check_version.py +7 -10
  320. mindspore/runtime/__init__.py +5 -5
  321. mindspore/runtime/event.py +10 -4
  322. mindspore/runtime/executor.py +60 -45
  323. mindspore/runtime/memory.py +30 -32
  324. mindspore/runtime/thread_bind_core.py +298 -164
  325. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  326. mindspore/swresample-4.dll +0 -0
  327. mindspore/swscale-6.dll +0 -0
  328. mindspore/tbbmalloc.dll +0 -0
  329. mindspore/tinyxml2.dll +0 -0
  330. mindspore/train/_utils.py +14 -4
  331. mindspore/train/amp.py +43 -20
  332. mindspore/train/callback/__init__.py +5 -5
  333. mindspore/train/callback/_checkpoint.py +3 -6
  334. mindspore/train/callback/_flops_collector.py +1 -1
  335. mindspore/train/callback/_landscape.py +0 -1
  336. mindspore/train/callback/_train_fault_tolerance.py +97 -16
  337. mindspore/train/data_sink.py +11 -2
  338. mindspore/train/dataset_helper.py +9 -0
  339. mindspore/train/model.py +135 -55
  340. mindspore/train/serialization.py +133 -111
  341. mindspore/train/summary/summary_record.py +13 -2
  342. mindspore/turbojpeg.dll +0 -0
  343. mindspore/utils/__init__.py +3 -2
  344. mindspore/utils/dryrun.py +0 -6
  345. mindspore/utils/runtime_execution_order_check.py +163 -77
  346. mindspore/utils/sdc_detect.py +68 -0
  347. mindspore/utils/utils.py +6 -9
  348. mindspore/vcmeta.dll +0 -0
  349. mindspore/vcruntime140.dll +0 -0
  350. mindspore/vcruntime140_1.dll +0 -0
  351. mindspore/version.py +1 -1
  352. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
  353. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +356 -394
  354. mindspore/_deprecated/jit.py +0 -198
  355. mindspore/experimental/es/__init__.py +0 -22
  356. mindspore/experimental/es/embedding_service.py +0 -891
  357. mindspore/experimental/es/embedding_service_layer.py +0 -581
  358. mindspore/profiler/parser/__init__.py +0 -14
  359. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  360. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  361. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  362. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  363. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  364. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  365. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  366. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  367. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  368. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  369. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  370. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  371. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  372. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  373. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  374. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  375. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  376. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  377. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  378. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  379. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  380. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  381. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  382. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  383. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  384. mindspore/profiler/parser/container.py +0 -229
  385. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  386. mindspore/profiler/parser/flops_parser.py +0 -531
  387. mindspore/profiler/parser/framework_enum.py +0 -111
  388. mindspore/profiler/parser/framework_parser.py +0 -464
  389. mindspore/profiler/parser/framework_struct.py +0 -61
  390. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  391. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  392. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  393. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  394. mindspore/profiler/parser/hccl_parser.py +0 -573
  395. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  396. mindspore/profiler/parser/integrator.py +0 -526
  397. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  398. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  399. mindspore/profiler/parser/minddata_parser.py +0 -186
  400. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  401. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  402. mindspore/profiler/parser/optime_parser.py +0 -250
  403. mindspore/profiler/parser/profiler_info.py +0 -213
  404. mindspore/profiler/parser/step_trace_parser.py +0 -666
  405. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
  406. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
  407. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
@@ -201,9 +201,9 @@ def adaptive_avg_pool1d(input, output_size):
201
201
 
202
202
  Examples:
203
203
  >>> import mindspore
204
- >>> from mindspore import Tensor, mint
204
+ >>> from mindspore import Tensor, ops
205
205
  >>> input = Tensor([[2,3],[3,4]],dtype=mindspore.float16)
206
- >>> output = mint.nn.functional.adaptive_avg_pool1d(input, 3)
206
+ >>> output = ops.auto_generate.adaptive_avg_pool1d(input, 3)
207
207
  >>> print(output)
208
208
  [[2. 2.5 3. ]
209
209
  [3. 3.5 4. ]]
@@ -442,8 +442,7 @@ def apply_rotary_pos_emb_(query, key, cos, sin, position_ids, cos_format=0):
442
442
  r"""
443
443
 
444
444
  """
445
- apply_rotary_pos_emb_op = _get_cache_prim(ApplyRotaryPosEmb)(cos_format)
446
- return apply_rotary_pos_emb_op(query, key, cos, sin, position_ids)
445
+ return apply_rotary_pos_emb_impl(query, key, cos, sin, position_ids, cos_format)
447
446
 
448
447
 
449
448
  def argmax_ext(input, dim=None, keepdim=False):
@@ -527,9 +526,9 @@ def argmin_ext(input, dim=None, keepdim=False):
527
526
  Examples:
528
527
  >>> import numpy as np
529
528
  >>> from mindspore import Tensor
530
- >>> from mindspore import mint
529
+ >>> from mindspore import ops
531
530
  >>> x = Tensor(np.array([[1, 20, 5], [67, 8, 9], [130, 24, 15]]).astype(np.float32))
532
- >>> output = mint.argmin(x, dim=-1)
531
+ >>> output = ops.auto_generate.argmin_ext(x, dim=-1)
533
532
  >>> print(output)
534
533
  [0 1 2]
535
534
  """
@@ -566,14 +565,13 @@ def argsort_ext(input, dim=-1, descending=False, stable=False):
566
565
  Examples:
567
566
  >>> import mindspore
568
567
  >>> import numpy as np
569
- >>> from mindspore import Tensor
570
- >>> import mindspore.mint as mint
568
+ >>> from mindspore import Tensor, ops
571
569
  >>> x = Tensor(np.array([[8, 2, 1], [5, 9, 3], [4, 6, 7]]), mindspore.float16)
572
- >>> sort = mint.argsort(x)
570
+ >>> sort = ops.auto_generate.argsort_ext(x)
573
571
  >>> print(sort)
574
572
  [[2 1 0]
575
- [2 0 1]
576
- [0 1 2]]
573
+ [2 0 1]
574
+ [0 1 2]]
577
575
  """
578
576
  return argsort_op(input, dim, descending, stable)
579
577
 
@@ -819,7 +817,7 @@ def atan2_ext(input, other):
819
817
  >>> from mindspore import Tensor, ops
820
818
  >>> input = Tensor(np.array([0, 1]), mindspore.float32)
821
819
  >>> other = Tensor(np.array([1, 1]), mindspore.float32)
822
- >>> output = mint.atan2(input, other)
820
+ >>> output = ops.auto_generate.atan2_ext(input, other)
823
821
  >>> print(output)
824
822
  [0. 0.7853982]
825
823
  """
@@ -979,9 +977,9 @@ def avg_pool1d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False,
979
977
  Examples:
980
978
  >>> import mindspore
981
979
  >>> import numpy as np
982
- >>> from mindspore import Tensor, mint
980
+ >>> from mindspore import Tensor, ops
983
981
  >>> input_x = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
984
- >>> output = mint.nn.functional.avg_pool1d(input_x, kernel_size=6, stride=1)
982
+ >>> output = ops.auto_generate.avg_pool1d_ext(input_x, kernel_size=6, stride=1)
985
983
  >>> print(output.shape)
986
984
  (1, 3, 1)
987
985
  """
@@ -1086,14 +1084,14 @@ def bincount_ext(input, weights=None, minlength=0):
1086
1084
  ``Ascend``
1087
1085
 
1088
1086
  Examples:
1089
- >>> from mindspore import mint, Tensor
1090
- >>> print(mint.bincount(Tensor(np.arange(5))))
1087
+ >>> from mindspore import ops, Tensor
1088
+ >>> print(ops.auto_generate.bincount_ext(Tensor(np.arange(5))))
1091
1089
  [1 1 1 1 1]
1092
- >>> print(mint.bincount(Tensor(np.array([0, 1, 1, 3, 2, 1, 7]))))
1090
+ >>> print(ops.auto_generate.bincount_ext(Tensor(np.array([0, 1, 1, 3, 2, 1, 7]))))
1093
1091
  [1 3 1 1 0 0 0 1]
1094
1092
  >>> w = Tensor(np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6])) # weights
1095
1093
  >>> x = Tensor(np.array([0, 1, 1, 2, 2, 2]))
1096
- >>> print(mint.bincount(x, weights=w, minlength=5))
1094
+ >>> print(ops.auto_generate.bincount_ext(x, weights=w, minlength=5))
1097
1095
  [0.3 0.7 1.1 0. 0. ]
1098
1096
  """
1099
1097
  return bincount_ext_op(input, weights, minlength)
@@ -1184,7 +1182,7 @@ def broadcast_to(input, shape):
1184
1182
 
1185
1183
  Args:
1186
1184
  input (Tensor): The input tensor.
1187
- shape (tuple): The target shape.
1185
+ shape (tuple[int]): The target shape.
1188
1186
 
1189
1187
  Returns:
1190
1188
  Tensor
@@ -1209,6 +1207,84 @@ def broadcast_to(input, shape):
1209
1207
  """
1210
1208
  return broadcast_to_impl(input, shape)
1211
1209
 
1210
+
1211
+ def broadcast_to_view(input, shape):
1212
+ r"""
1213
+ Broadcasts input tensor to a given shape. The dim of input shape must be smaller
1214
+ than or equal to that of target shape. Suppose input shape is :math:`(x_1, x_2, ..., x_m)`,
1215
+ target shape is :math:`(*, y_1, y_2, ..., y_m)`, where :math:`*` means any additional dimension.
1216
+ The broadcast rules are as follows:
1217
+
1218
+ Compare the value of :math:`x_m` and :math:`y_m`, :math:`x_{m-1}` and :math:`y_{m-1}`, ...,
1219
+ :math:`x_1` and :math:`y_1` consecutively and
1220
+ decide whether these shapes are broadcastable and what the broadcast result is.
1221
+
1222
+ If the value pairs at a specific dim are equal, then that value goes right into that dim of output shape.
1223
+ With an input shape :math:`(2, 3)`, target shape :math:`(2, 3)` , the inferred output shape is :math:`(2, 3)`.
1224
+
1225
+ If the value pairs are unequal, there are three cases:
1226
+
1227
+ Case 1: If the value of the target shape in the dimension is -1, the value of the
1228
+ output shape in the dimension is the value of the corresponding input shape in the dimension.
1229
+ With an input shape :math:`(3, 3)`, target
1230
+ shape :math:`(-1, 3)`, the output shape is :math:`(3, 3)`.
1231
+
1232
+ Case 2: If the value of target shape in the dimension is not -1, but the corresponding
1233
+ value in the input shape is 1, then the corresponding value of the output shape
1234
+ is that of the target shape. With an input shape :math:`(1, 3)`, target
1235
+ shape :math:`(8, 3)`, the output shape is :math:`(8, 3)`.
1236
+
1237
+ Case 3: If the corresponding values of the two shapes do not satisfy the above cases,
1238
+ it means that broadcasting from the input shape to the target shape is not supported.
1239
+
1240
+ So far we got the last m dims of the outshape, now focus on the first :math:`*` dims, there are
1241
+ two cases:
1242
+
1243
+ If the first :math:`*` dims of output shape does not have -1 in it, then fill the input
1244
+ shape with ones until their length are the same, and then refer to
1245
+ Case 2 mentioned above to calculate the output shape. With target shape :math:`(3, 1, 4, 1, 5, 9)`,
1246
+ input shape :math:`(1, 5, 9)`, the filled input shape will be :math:`(1, 1, 1, 1, 5, 9)` and thus the
1247
+ output shape is :math:`(3, 1, 4, 1, 5, 9)`.
1248
+
1249
+ If the first :math:`*` dims of output shape have -1 in it, it implies this -1 is corresponding to
1250
+ a non-existing dim so they're not broadcastable. With target shape :math:`(3, -1, 4, 1, 5, 9)`,
1251
+ input shape :math:`(1, 5, 9)`, instead of operating the dim-filling process first, it raises errors directly.
1252
+
1253
+ Args:
1254
+ input (Tensor): The input Tensor.
1255
+ shape (tuple): The target shape to broadcast. Can be fully specified, or have -1 in one position
1256
+ where it will be substituted by the input tensor's shape in that position, see example.
1257
+
1258
+ Returns:
1259
+ Tensor, with the given `shape` and the same data type as `input`.
1260
+
1261
+ Raises:
1262
+ TypeError: If `shape` is not a tuple.
1263
+ ValueError: If the target and input shapes are incompatible, or if a - 1 in the target shape is in an invalid
1264
+ location.
1265
+
1266
+ Supported Platforms:
1267
+ ``Ascend``
1268
+
1269
+ Examples:
1270
+ >>> import numpy as np
1271
+ >>> from mindspore import Tensor
1272
+ >>> from mindspore.ops.auto_generate import BroadcastToView
1273
+ >>> shape = (2, 3)
1274
+ >>> x = Tensor(np.array([1, 2, 3]).astype(np.float32))
1275
+ >>> output = BroadcastToView()(x, shape)
1276
+ >>> print(output)
1277
+ [[1. 2. 3.]
1278
+ [1. 2. 3.]]
1279
+ >>> shape = (-1, 2)
1280
+ >>> x = Tensor(np.array([[1], [2]]).astype(np.float32))
1281
+ >>> output = BroadcastToView()(x, shape)
1282
+ >>> print(output)
1283
+ [[1. 1.]
1284
+ [2. 2.]]
1285
+ """
1286
+ return broadcast_to_view_op(input, shape)
1287
+
1212
1288
  cast_op=Cast()
1213
1289
 
1214
1290
  def cast(input, dtype):
@@ -1750,7 +1826,7 @@ def correlate(a, v, pad_mode='valid'):
1750
1826
 
1751
1827
  Note:
1752
1828
  - `correlate` is currently only used in `mindscience` scientific computing scenarios and
1753
- dose not support other usage scenarios.
1829
+ does not support other usage scenarios.
1754
1830
  - `correlate` is not supported on Windows platform yet.
1755
1831
 
1756
1832
  Args:
@@ -1960,7 +2036,7 @@ def cummin_ext(input, dim):
1960
2036
  \end{array}
1961
2037
 
1962
2038
  .. note::
1963
- O2 mode is not supported in Ascend.
2039
+ GE backend is not supported in Ascend.
1964
2040
 
1965
2041
  Args:
1966
2042
  input (Tensor): The input Tensor, The dimension must be greater than 0.
@@ -2040,61 +2116,6 @@ def cumsum_ext(input, dim, dtype=None):
2040
2116
  return cumsum_ext_op(input, dim, dtype)
2041
2117
 
2042
2118
 
2043
- def decoder_k_v_cache(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len):
2044
- r"""
2045
- The DecoderKVCache is used for decoding the KVCache of transformer network.
2046
-
2047
- Args:
2048
- cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
2049
- When format is BHSD, cache tensor of shape
2050
- :math:`(batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
2051
- When format is BSD, cache tensor of shape
2052
- :math:`(batch\_size, max\_seq\_length, hidden\_size)`.
2053
- update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
2054
- When format is BHSD, update tensor of shape
2055
- :math:`(batch\_size, num\_head, update\_seq\_length, size\_pre\_head)`.
2056
- When format is BSD, update tensor of shape
2057
- :math:`(batch\_size, update\_seq\_length, hidden\_size)`.
2058
- valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
2059
- Valid_seq_len tensor of shape :math:`(batch\_size)`.
2060
- batch_index (Tensor): The batch_index tensor with data type of int64.
2061
- Batch_index tensor of shape :math:`(batch\_size)`. Indicate that which batch of cache tensor is going to be update. Not abel for now.
2062
- seq_len_axis (Tensor): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Not able for now.
2063
- new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
2064
- New_max_seq_len tensor of shape :math:`(1)`.
2065
- Indicate that user want to change the shape of cache tensor from
2066
- :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)`. to
2067
- :math:`(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`.
2068
- to update the cache tensor. This will not real change the shape of `cache` tensor.
2069
- cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
2070
- Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
2071
-
2072
- Outputs:
2073
- With same data type and same shape as `cache` tensor.
2074
-
2075
- Supported Platforms:
2076
- ``Ascend``
2077
-
2078
- Examples:
2079
- >>> from mindspore.ops.operations import _inner_ops
2080
- >>> b = 4
2081
- >>> h = 40
2082
- >>> max_s = 1024
2083
- >>> s = 1
2084
- >>> d = 128
2085
- >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
2086
- >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
2087
- >>> valid_seq_len = Tensor(np.random.randint(-1, s, size=b).astype(np.int64))
2088
- >>> batch_index = Tensor(np.random.choice(np.arange(-1, b), size=b, replace=False).astype(np.int64))
2089
- >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
2090
- >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
2091
- >>> decoder_kv_cache = _inner_ops.DecoderKVCache()
2092
- >>> output = decoder_kv_cache(cache, update, valid_seq_len, batch_index, Tensor(2), new_max_seq_len, cur_max_seq_len)
2093
- >>> print(cache)
2094
- """
2095
- return decoder_k_v_cache_op(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len)
2096
-
2097
-
2098
2119
  def dense(input, weight, bias=None):
2099
2120
  r"""
2100
2121
  Applies the dense connected operation to the `input`. The dense function is defined as:
@@ -2233,9 +2254,9 @@ def diag_ext(input, diagonal=0):
2233
2254
  ``Ascend``
2234
2255
 
2235
2256
  Examples:
2236
- >>> from mindspore import Tensor, mint
2257
+ >>> from mindspore import Tensor, ops
2237
2258
  >>> input = Tensor([1, 2, 3, 4]).astype('int32')
2238
- >>> output = mint.diag(input)
2259
+ >>> output = ops.auto_generate.diag_ext(input)
2239
2260
  >>> print(output)
2240
2261
  [[1 0 0 0]
2241
2262
  [0 2 0 0]
@@ -2331,10 +2352,10 @@ def dot(input, other):
2331
2352
 
2332
2353
  Examples:
2333
2354
  >>> import mindspore
2334
- >>> from mindspore import Tensor, mint
2355
+ >>> from mindspore import Tensor, ops
2335
2356
  >>> x = Tensor([2.0, 3.0], mindspore.float32)
2336
2357
  >>> y = Tensor([2.0, 1.0], mindspore.float32)
2337
- >>> output = mint.dot(x, y)
2358
+ >>> output = ops.auto_generate.dot(x, y)
2338
2359
  >>> print(output)
2339
2360
  7.0
2340
2361
  >>> print(output.dtype)
@@ -2781,6 +2802,46 @@ def expand_dims(input_x, axis):
2781
2802
  return expand_dims_op(input_x, axis)
2782
2803
 
2783
2804
 
2805
+ def expand_dims_view(input, dim):
2806
+ r"""
2807
+ Adds an additional dimension to `input_x` at the given axis, the dimension
2808
+ of `input_x` should be greater than or equal to 1.
2809
+
2810
+ Note:
2811
+ If the specified axis is a negative number, the index is counted
2812
+ backward from the end and starts at 1.
2813
+
2814
+ Args:
2815
+ input_x (Tensor): The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
2816
+ axis (int): Specifies the dimension index at which to expand
2817
+ the shape of `input_x`. The value of axis must be in the range
2818
+ `[-input_x.ndim-1, input_x.ndim]`. Only constant value is allowed.
2819
+
2820
+ Returns:
2821
+ Tensor, the shape of tensor is :math:`(1, x_1, x_2, ..., x_R)` if the
2822
+ value of `axis` is 0. It has the same data type as `input_x`.
2823
+
2824
+ Raises:
2825
+ TypeError: If `axis` is not an int.
2826
+ ValueError: If `axis` is not in the valid range :math:`[-a.ndim-1, a.ndim]`.
2827
+
2828
+ Supported Platforms:
2829
+ ``Ascend``
2830
+
2831
+ Examples:
2832
+ >>> import mindspore
2833
+ >>> import numpy as np
2834
+ >>> from mindspore import Tensor, ops
2835
+ >>> from mindspore.ops.auto_generate import ExpandDimsView
2836
+ >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32)
2837
+ >>> output = ExpandDimsView()(input_tensor, 0)
2838
+ >>> print(output)
2839
+ [[[2. 2.]
2840
+ [2. 2.]]]
2841
+ """
2842
+ return expand_dims_view_op(input, dim)
2843
+
2844
+
2784
2845
  def expm1(input):
2785
2846
  r"""
2786
2847
  Compute exponential of the input tensor, then minus 1, element-wise.
@@ -2936,7 +2997,7 @@ def fft2(input, s=None, dim=(-2, -1), norm=None):
2936
2997
 
2937
2998
  Note:
2938
2999
  - `fft2` is currently only used in `mindscience` scientific computing scenarios and
2939
- dose not support other usage scenarios.
3000
+ does not support other usage scenarios.
2940
3001
  - `fft2` is not supported on Windows platform yet.
2941
3002
 
2942
3003
  Args:
@@ -3000,7 +3061,7 @@ def fftfreq(n, d=1.0, dtype=None):
3000
3061
 
3001
3062
  Note:
3002
3063
  - `fftfreq` is currently only used in `mindscience` scientific computing scenarios and
3003
- dose not support other usage scenarios.
3064
+ does not support other usage scenarios.
3004
3065
  - `fftfreq` is not supported on Windows platform yet.
3005
3066
 
3006
3067
  Args:
@@ -3033,7 +3094,7 @@ def fftn(input, s=None, dim=None, norm=None):
3033
3094
 
3034
3095
  Note:
3035
3096
  - `fftn` is currently only used in `mindscience` scientific computing scenarios and
3036
- dose not support other usage scenarios.
3097
+ does not support other usage scenarios.
3037
3098
  - `fftn` is not supported on Windows platform yet.
3038
3099
 
3039
3100
  Args:
@@ -3093,7 +3154,7 @@ def fftshift(input, dim=None):
3093
3154
 
3094
3155
  Note:
3095
3156
  - `fftshift` is currently only used in `mindscience` scientific computing scenarios and
3096
- dose not support other usage scenarios.
3157
+ does not support other usage scenarios.
3097
3158
  - `fftshift` is not supported on Windows platform yet.
3098
3159
 
3099
3160
  Args:
@@ -3129,7 +3190,7 @@ def fft(input, n=None, dim=-1, norm=None):
3129
3190
 
3130
3191
  Note:
3131
3192
  - `fft` is currently only used in `mindscience` scientific computing scenarios and
3132
- dose not support other usage scenarios.
3193
+ does not support other usage scenarios.
3133
3194
  - `fft` is not supported on Windows platform yet.
3134
3195
 
3135
3196
  Args:
@@ -3388,6 +3449,13 @@ def frac_ext(input):
3388
3449
  return frac_op(input)
3389
3450
 
3390
3451
 
3452
+ def fused_add_topk_div(x, add_num, group_num, group_topk, n, k, activate_type=0, is_norm=True, scale=2.5, mapping_num=None, mapping_table=None, enable_expert_mapping=False):
3453
+ r"""
3454
+
3455
+ """
3456
+ return fused_add_topk_div_op(x, add_num, group_num, group_topk, n, k, activate_type, is_norm, scale, mapping_num, mapping_table, enable_expert_mapping)
3457
+
3458
+
3391
3459
  def gather_d(x, dim, index):
3392
3460
  r"""
3393
3461
  Gathers elements along an axis specified by dim.
@@ -3579,20 +3647,6 @@ def geqrf(input):
3579
3647
  return geqrf_op(input)
3580
3648
 
3581
3649
 
3582
- def gmm_backward(grad, x, weight, group_list=None):
3583
- r"""
3584
-
3585
- """
3586
- return gmm_backward_op(grad, x, weight, group_list)
3587
-
3588
-
3589
- def gmm_v2_backward(grad, x, weight, group_list=None, group_list_type=0):
3590
- r"""
3591
-
3592
- """
3593
- return gmm_v2_backward_op(grad, x, weight, group_list, group_list_type)
3594
-
3595
-
3596
3650
  def greater_equal(input, other):
3597
3651
  r"""
3598
3652
  Compute the value of :math:`input >= other` element-wise.
@@ -3675,7 +3729,7 @@ def hfft2(input, s=None, dim=(-2, -1), norm=None):
3675
3729
 
3676
3730
  Note:
3677
3731
  - `hfft2` is currently only used in `mindscience` scientific computing scenarios and
3678
- dose not support other usage scenarios.
3732
+ does not support other usage scenarios.
3679
3733
  - `hfft2` is not supported on Windows platform yet.
3680
3734
 
3681
3735
  Args:
@@ -3736,7 +3790,7 @@ def hfftn(input, s=None, dim=None, norm=None):
3736
3790
 
3737
3791
  Note:
3738
3792
  - `hfftn` is currently only used in `mindscience` scientific computing scenarios and
3739
- dose not support other usage scenarios.
3793
+ does not support other usage scenarios.
3740
3794
  - `hfftn` is not supported on Windows platform yet.
3741
3795
 
3742
3796
  Args:
@@ -3797,7 +3851,7 @@ def hfft(input, n=None, dim=-1, norm=None):
3797
3851
 
3798
3852
  Note:
3799
3853
  - `hfft` is currently only used in `mindscience` scientific computing scenarios and
3800
- dose not support other usage scenarios.
3854
+ does not support other usage scenarios.
3801
3855
  - `hfft` is not supported on Windows platform yet.
3802
3856
 
3803
3857
  Args:
@@ -4058,7 +4112,7 @@ def ifft2(input, s=None, dim=(-2, -1), norm=None):
4058
4112
 
4059
4113
  Note:
4060
4114
  - `ifft2` is currently only used in `mindscience` scientific computing scenarios and
4061
- dose not support other usage scenarios.
4115
+ does not support other usage scenarios.
4062
4116
  - `ifft2` is not supported on Windows platform yet.
4063
4117
 
4064
4118
  Args:
@@ -4118,7 +4172,7 @@ def ifftn(input, s=None, dim=None, norm=None):
4118
4172
 
4119
4173
  Note:
4120
4174
  - `ifftn` is currently only used in `mindscience` scientific computing scenarios and
4121
- dose not support other usage scenarios.
4175
+ does not support other usage scenarios.
4122
4176
  - `ifftn` is not supported on Windows platform yet.
4123
4177
 
4124
4178
  Args:
@@ -4178,7 +4232,7 @@ def ifftshift(input, dim=None):
4178
4232
 
4179
4233
  Note:
4180
4234
  - `ifftshift` is currently only used in `mindscience` scientific computing scenarios and
4181
- dose not support other usage scenarios.
4235
+ does not support other usage scenarios.
4182
4236
  - `ifftshift` is not supported on Windows platform yet.
4183
4237
 
4184
4238
  Args:
@@ -4214,7 +4268,7 @@ def ifft(input, n=None, dim=-1, norm=None):
4214
4268
 
4215
4269
  Note:
4216
4270
  - `ifft` is currently only used in `mindscience` scientific computing scenarios and
4217
- dose not support other usage scenarios.
4271
+ does not support other usage scenarios.
4218
4272
  - `ifft` is not supported on Windows platform yet.
4219
4273
 
4220
4274
  Args:
@@ -4270,7 +4324,7 @@ def ihfft2(input, s=None, dim=(-2, -1), norm=None):
4270
4324
 
4271
4325
  Note:
4272
4326
  - `ihfft2` is currently only used in `mindscience` scientific computing scenarios and
4273
- dose not support other usage scenarios.
4327
+ does not support other usage scenarios.
4274
4328
  - `ihfft2` is not supported on Windows platform yet.
4275
4329
 
4276
4330
  Args:
@@ -4331,7 +4385,7 @@ def ihfftn(input, s=None, dim=None, norm=None):
4331
4385
 
4332
4386
  Note:
4333
4387
  - `ihfftn` is currently only used in `mindscience` scientific computing scenarios and
4334
- dose not support other usage scenarios.
4388
+ does not support other usage scenarios.
4335
4389
  - `ihfftn` is not supported on Windows platform yet.
4336
4390
 
4337
4391
  Args:
@@ -4392,7 +4446,7 @@ def ihfft(input, n=None, dim=-1, norm=None):
4392
4446
 
4393
4447
  Note:
4394
4448
  - `ihfft` is currently only used in `mindscience` scientific computing scenarios and
4395
- dose not support other usage scenarios.
4449
+ does not support other usage scenarios.
4396
4450
  - `ihfft` is not supported on Windows platform yet.
4397
4451
 
4398
4452
  Args:
@@ -4513,56 +4567,6 @@ def unfold_ext(input, kernel_size, dilation=1, padding=0, stride=1):
4513
4567
  return im2col_ext_op(input, kernel_size, dilation, padding, stride)
4514
4568
 
4515
4569
 
4516
- def index_add_ext(input, dim, index, source, alpha=1):
4517
- r"""
4518
- Accumulate the elements of `alpha` times `source` into the `input` by adding to the index in the order given in `index`. For example, if ``dim == 0`` , ``index[i] == j`` , and ``alpha = -1`` , then the `i` th row of `source` is subtracted from the `j` th row of `input` . The `dim` th dimension of `source` must have the same size as the length of `index` , and all other dimensions must match `input`, or an error will be raised. For a 3-D tensor, the output is defined as follows:
4519
-
4520
- .. math::
4521
- \begin{array}{ll}
4522
- input[index[i],\ :,\ :]\ +=\ alpha * source[i,\ :,\ :] \qquad \#if\ dim == 0 \\
4523
- input[:,\ \ index[i],\ :]\ +=\ alpha * source[:,\ \ i,\ :] \qquad \#if\ dim == 1 \\
4524
- input[:,\ :,\ \ index[i]]\ +=\ alpha * source[:,\ :,\ \ i] \qquad\#if\ dim == 2 \\
4525
- \end{array}
4526
-
4527
- .. warning::
4528
- This is an experimental API that is subject to change or deletion.
4529
-
4530
- Args:
4531
- input (Tensor): The input Tensor.
4532
- dim (int): The dimension along which to index.
4533
- index (Tensor): Add the value of "input Tensor" and `source` along the dimension of the `dim` according to the specified index value, with data type int32. The `index` must be 1D with the same size as the size of `source` in the `dim` dimension. The values of `index` should be in [0, b), where the b is the size of "input Tensor" in the `dim` dimension.
4534
- source (Tensor): The input tensor with the value to add. Must have same data type as "input Tensor". The shape must be the same as "input Tensor" except the `dim` th dimension.
4535
- alpha (number, optional): The scalar multiplier for source. Default: ``1``.
4536
-
4537
- Returns:
4538
- Tensor, has the same shape and dtype as `input`.
4539
-
4540
- Raises:
4541
- TypeError: If neither `index` nor `source` is a Tensor.
4542
- ValueError: If the value of `dim` is out of the dimension range of `source` shape.
4543
- ValueError: If `index` rank is not the same as `source` rank.
4544
- ValueError: If shape of `index` is not 1D or size of `index` is not equal to dimension of source[dim].
4545
- ValueError: If the shape of `source` is not the same as that of `input` except the `dim` axis.
4546
-
4547
- Supported Platforms:
4548
- ``Ascend``
4549
-
4550
- Examples:
4551
- >>> import numpy as np
4552
- >>> import mindspore
4553
- >>> from mindspore import Tensor, ops
4554
- >>> x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32)
4555
- >>> index = Tensor(np.array([0, 2]), mindspore.int32)
4556
- >>> y = Tensor(np.array([[0.5, 1.0], [1.0, 1.5], [2.0, 2.5]]), mindspore.float32)
4557
- >>> output = ops.auto_generate.index_add_ext(x, 1, index, y, alpha=1)
4558
- >>> print(output)
4559
- [[ 1.5 2. 4. ]
4560
- [ 5. 5. 7.5]
4561
- [ 9. 8. 11.5]]
4562
- """
4563
- return index_add_ext_op(input, dim, index, source, alpha)
4564
-
4565
-
4566
4570
  def index_fill_scalar(input, dim, index, value):
4567
4571
  r"""
4568
4572
 
@@ -4673,6 +4677,13 @@ def index_select_ext(input, dim, index):
4673
4677
  return index_select_op(input, dim, index)
4674
4678
 
4675
4679
 
4680
+ def inner_moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
4681
+ r"""
4682
+
4683
+ """
4684
+ return inner_moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)
4685
+
4686
+
4676
4687
  def inplace_adds_ext(input, other, alpha=1):
4677
4688
  r"""
4678
4689
 
@@ -4903,9 +4914,9 @@ def inplace_hardtanh(input, min_val=-1, max_val=1):
4903
4914
 
4904
4915
  Examples:
4905
4916
  >>> import mindspore
4906
- >>> from mindspore import Tensor, mint
4917
+ >>> from mindspore import Tensor, ops
4907
4918
  >>> x = Tensor([-1, -2, 0, 2, 1], mindspore.float16)
4908
- >>> mint.hardtanh_(x, min_val=-1.0, max_val=1.0)
4919
+ >>> ops.auto_generate.inplace_hardtanh(x, min_val=-1.0, max_val=1.0)
4909
4920
  >>> print(x)
4910
4921
  [-1. -1. 0. 1. 1.]
4911
4922
  """
@@ -4980,6 +4991,51 @@ def masked_fill_tensor_(input, mask, value):
4980
4991
  return inplace_masked_fill_tensor_op(input, mask, value)
4981
4992
 
4982
4993
 
4994
+ def matmul_add_(x, weight, C):
4995
+ r"""
4996
+ Fusion Operator of Transpose, Matmul, and InplaceAdd.
4997
+
4998
+ .. warning::
4999
+ - This is an experimental API that is subject to change or deletion.
5000
+ - This API is only supported in Atlas A2 training series for now.
5001
+ - This API is only supported on GRAPH mode.
5002
+
5003
+ Args:
5004
+ x (Tensor): Matrix A in matrix multiplication, with shape :math:`(k, m)` or :math:`(batch, k, m)`,
5005
+ whose type should be float16 or bfloat16.
5006
+ weight (Tensor): Matrix B in matrix multiplication, with shape :math:`(k, n)` or :math:`(batch, k, n)`,
5007
+ whose type should be float16 or bfloat16.
5008
+ C (Tensor): A Tensor acting as both input and output, with type of float32.
5009
+ It's shape should be :math:`(m, n)` or :math:`(batch, m, n)`.
5010
+
5011
+ Returns:
5012
+ Tensor, has the same shape and data type as `C`.
5013
+
5014
+ Raises:
5015
+ TypeError: If the dtype of `weight` is not the same as `x`.
5016
+ ValueError: If the ranks of `x` , `weight` and `C` are not the same.
5017
+
5018
+ Supported Platforms:
5019
+ ``Ascend``
5020
+
5021
+ Examples:
5022
+ >>> import mindspore
5023
+ >>> import numpy as np
5024
+ >>> from mindspore import Tensor, ops, nn, context
5025
+ >>> context.set_context(mode=context.GRAPH_MODE, jit_config={"jit_level": "O0"})
5026
+ >>> class Net(nn.Cell):
5027
+ ... def construct(self, x, weight, C):
5028
+ ... return ops.auto_generate.inplace_matmul_add_op(x, weight, C)
5029
+ >>> x = Tensor(np.random.randn(10, 20), mindspore.float16)
5030
+ >>> weight = Tensor(np.random.randn(10, 8), mindspore.float16)
5031
+ >>> C = Tensor(np.random.randn(20, 8), mindspore.float32)
5032
+ >>> output = Net()(x, weight, C)
5033
+ >>> print(output.shape)
5034
+ (20, 8)
5035
+ """
5036
+ return inplace_matmul_add_op(x, weight, C)
5037
+
5038
+
4983
5039
  def inplace_muls(input, other):
4984
5040
  r"""
4985
5041
 
@@ -5008,6 +5064,52 @@ def inplace_scatter_add(input, dim, index, src):
5008
5064
  return inplace_scatter_add_op(input, dim, index, src)
5009
5065
 
5010
5066
 
5067
+ def inplace_silu(input):
5068
+ r"""
5069
+ Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
5070
+
5071
+ .. math::
5072
+
5073
+ \text{SiLU}(x) = x * \sigma(x),
5074
+
5075
+ where :math:`x` is an element of the input, :math:`\sigma(x)` is Sigmoid function.
5076
+
5077
+ .. math::
5078
+
5079
+ \text{sigma}(x_i) = \frac{1}{1 + \exp(-x_i)},
5080
+
5081
+ SiLU Function Graph:
5082
+
5083
+ .. image:: ../images/SiLU.png
5084
+ :align: center
5085
+
5086
+ Args:
5087
+ input (Tensor): `input` is :math:`x` in the preceding formula. Input with the data type
5088
+ float16 or float32.
5089
+ inplace (bool, optional): If it is ``True``, enable the in place update function.
5090
+ Default value: ``False``.
5091
+
5092
+ Returns:
5093
+ Tensor, with the same type and shape as the `input`.
5094
+
5095
+ Raises:
5096
+ TypeError: If dtype of `input` is neither float16 nor float32.
5097
+
5098
+ Supported Platforms:
5099
+ ``Ascend`` ``GPU`` ``CPU``
5100
+
5101
+ Examples:
5102
+ >>> import mindspore
5103
+ >>> from mindspore import Tensor, mint
5104
+ >>> import numpy as np
5105
+ >>> input = Tensor(np.array([-1, 2, -3, 2, -1]), mindspore.float16)
5106
+ >>> output = mint.nn.functional.silu(input, inplace=True)
5107
+ >>> print(output)
5108
+ [-0.269 1.762 -0.1423 1.762 -0.269]
5109
+ """
5110
+ return inplace_silu_op(input)
5111
+
5112
+
5011
5113
  def inplace_stop_gradient(input):
5012
5114
  r"""
5013
5115
 
@@ -5049,9 +5151,6 @@ def inplace_threshold(input, threshold, value):
5049
5151
  \text{value}, &\text{ otherwise }
5050
5152
  \end{cases}
5051
5153
 
5052
- .. warning::
5053
- This is an experimental API that is subject to change or deletion.
5054
-
5055
5154
  Args:
5056
5155
  input (Tensor): The input Tensor.
5057
5156
  threshold (Union[int, float]): The value of the threshold.
@@ -5092,7 +5191,7 @@ def irfft2(input, s=None, dim=(-2, -1), norm=None):
5092
5191
 
5093
5192
  Note:
5094
5193
  - `irfft2` is currently only used in `mindscience` scientific computing scenarios and
5095
- dose not support other usage scenarios.
5194
+ does not support other usage scenarios.
5096
5195
  - `irfft2` is not supported on Windows platform yet.
5097
5196
 
5098
5197
  Args:
@@ -5150,7 +5249,7 @@ def irfftn(input, s=None, dim=None, norm=None):
5150
5249
 
5151
5250
  Note:
5152
5251
  - `irfftn` is currently only used in `mindscience` scientific computing scenarios and
5153
- dose not support other usage scenarios.
5252
+ does not support other usage scenarios.
5154
5253
  - `irfftn` is not supported on Windows platform yet.
5155
5254
 
5156
5255
  Args:
@@ -5209,7 +5308,7 @@ def irfft(input, n=None, dim=-1, norm=None):
5209
5308
 
5210
5309
  Note:
5211
5310
  - `irfft` is currently only used in `mindscience` scientific computing scenarios and
5212
- dose not support other usage scenarios.
5311
+ does not support other usage scenarios.
5213
5312
  - `irfft` is not supported on Windows platform yet.
5214
5313
 
5215
5314
  Args:
@@ -5376,12 +5475,12 @@ def kthvalue(input, k, dim=-1, keepdim=False):
5376
5475
  Examples:
5377
5476
  >>> import mindspore
5378
5477
  >>> import numpy as np
5379
- >>> from mindspore import Tensor, mint
5478
+ >>> from mindspore import Tensor, ops
5380
5479
  >>> input_x = Tensor(np.array([[1.01, 2.02, 3.03], [1.04, 2.05, 3.06]]), mindspore.float32)
5381
- >>> out = mint.kthvalue(input_x, 2, 1, False)
5480
+ >>> out = ops.auto_generate.kthvalue(input_x, 2, 1, False)
5382
5481
  >>> print(out)
5383
5482
  (Tensor(shape=[2], dtype=Float32, value= [ 2.01999998e+00, 2.04999995e+00]), Tensor(shape=[2], dtype=Int64, value= [1, 1]))
5384
- >>> out1 = mint.kthvalue(input_x, 2, 1, True)
5483
+ >>> out1 = ops.auto_generate.kthvalue(input_x, 2, 1, True)
5385
5484
  >>> print(out1)
5386
5485
  (Tensor(shape=[2, 1], dtype=Float32, value=
5387
5486
  [[ 2.01999998e+00],
@@ -5669,9 +5768,9 @@ def linalg_qr(A, mode='reduced'):
5669
5768
  Examples:
5670
5769
  >>> import mindspore
5671
5770
  >>> import numpy as np
5672
- >>> from mindspore import Tensor, mint
5771
+ >>> from mindspore import Tensor, ops
5673
5772
  >>> x = Tensor(np.array([[1.0, 1.0, 2.0, 4.0], [1.0, 1.0, 2.0, 4.0]]), mindspore.float32)
5674
- >>> output = mint.linalg.qr(x)
5773
+ >>> output = ops.auto_generate.linalg_qr(x)
5675
5774
  >>> print(output)
5676
5775
  (Tensor(shape=[2, 2], dtype=Float32, value=
5677
5776
  [[-7.07106829e-01, -7.07106769e-01],
@@ -5713,9 +5812,9 @@ def log10_ext(input):
5713
5812
  Examples:
5714
5813
  >>> import mindspore
5715
5814
  >>> import numpy as np
5716
- >>> from mindspore import Tensor, mint
5815
+ >>> from mindspore import Tensor, ops
5717
5816
  >>> x = Tensor(np.array([3.0, 5.0, 7.0]), mindspore.float32)
5718
- >>> output = mint.log10(x)
5817
+ >>> output = ops.auto_generate.log10_ext(x)
5719
5818
  >>> print(output)
5720
5819
  [0.47712136 0.69897 0.845098 ]
5721
5820
  """
@@ -5775,9 +5874,9 @@ def log2_ext(input):
5775
5874
  Examples:
5776
5875
  >>> import mindspore
5777
5876
  >>> import numpy as np
5778
- >>> from mindspore import Tensor, mint
5877
+ >>> from mindspore import Tensor, ops
5779
5878
  >>> x = Tensor(np.array([3.0, 5.0, 7.0]), mindspore.float32)
5780
- >>> output = mint.log2(x)
5879
+ >>> output = ops.auto_generate.log2_ext(x)
5781
5880
  >>> print(output)
5782
5881
  [1.5849625 2.321928 2.807355 ]
5783
5882
  """
@@ -5810,10 +5909,10 @@ def logaddexp2(input, other):
5810
5909
 
5811
5910
  Examples:
5812
5911
  >>> import numpy as np
5813
- >>> from mindspore import Tensor, mint
5912
+ >>> from mindspore import Tensor, ops
5814
5913
  >>> x1 = Tensor(np.array([1, 2, 3]).astype(np.float16))
5815
5914
  >>> x2 = Tensor(np.array(2).astype(np.float16))
5816
- >>> output = mint.logaddexp2(x1, x2)
5915
+ >>> output = ops.auto_generate.logaddexp2(x1, x2)
5817
5916
  >>> print(output)
5818
5917
  [2.586 3. 3.586]
5819
5918
  """
@@ -6495,6 +6594,254 @@ def mm_ext(input, mat2):
6495
6594
  return mm_ext_op(input, mat2)
6496
6595
 
6497
6596
 
6597
+ def moe_distribute_combine(expand_x, expert_ids, expand_idx, ep_send_counts, expert_scales, ep_world_size, ep_rank_id, moe_expert_num, tp_send_counts=None, x_active_mask=None, activate_scale=None, weight_scale=None, group_list=None, expand_scales=None, group_ep=None, group_tp=None, tp_world_size=0, tp_rank_id=0, expert_shard_type=0, shared_expert_num=0, shared_export_rank_num=0, global_bs=0, out_dtype=0, common_quant_mode=0, group_list_type=0):
6598
+ r"""
6599
+ Parallel communication for Mixture of Experts (MoE). When Tensor Parallelism (TP) communication exists,
6600
+ it first ReduceScatter performs communication followed by Expert Parallelism (EP) AllToAllV communication.
6601
+ Otherwise, only EP AllToAllV communication is performed. Finally multiply the received data by weight and
6602
+ add them up.
6603
+
6604
+ Notes:
6605
+ This function must be used in conjunction with function `moe_distribute_dispatch`.
6606
+ - A: Maximum tokens to dispatch per rank:
6607
+ - For shared experts: A = BS * ep_world_size * shared_expert_num / shared_expert_rank_num
6608
+ - For MoE experts:
6609
+ - When global_bs = 0: A >= BS * ep_world_size * min(local_expert_num, K)
6610
+ - When global_bs != 0: A >= global_bs * min(local_expert_num, K)
6611
+ - H (hidden size): Dimension of each token's hidden state
6612
+ - Ascend 910B: 0 < H <= 7168, must be multiple of 32
6613
+ - Ascend 910_93: H = 7168
6614
+ - BS (batch sequence size): Number of tokens processed per rank
6615
+ - Ascend 910B: 0 < BS <= 256
6616
+ - Ascend 910_93: 0 < BS <= 512
6617
+ - K: Number of experts selected per token (0 < K <= 8 and K <= moe_expert_num)
6618
+ - server_num: Number of server nodes (supports 2, 4, 8)
6619
+ - local_expert_num: Number of experts per rank:
6620
+ - Shared expert ranks: local_expert_num = 1
6621
+ - MoE expert ranks: local_expert_num = moe_expert_num / (ep_world_size - shared_expert_rank_num)
6622
+ (TP communication not supported when localExpertNum > 1)
6623
+
6624
+ Inputs:
6625
+ - **expand_x** (Tensor) - Expanded token features. 2D tensor [A, H] with dtype matching input.
6626
+ Supported dtypes: float16, bfloat16, int8. Format: ND, non-contiguous allowed.
6627
+ - **expert_ids** (Tensor) - Top-K expert indices for each token. 2D int32 tensor with shape [BS, K].
6628
+ Format: ND, non-contiguous allowed.
6629
+ - **expert_idx** (Tensor) - Token counts per expert, it's the output of dispatch operation.
6630
+ 1D int32 tensor [BS*K]. Format: ND, non-contiguous allowed.
6631
+ - **ep_send_counts** (Tensor) - Tokens that each EP rank needs to send, it's the output of dispatch operation.
6632
+ - Ascend 910B: 1D int32 tensor [moe_expert_num + 2 * global_bs * K * server_num]
6633
+ - Ascend 910_93: 1D int32 tensor [ep_world_size * max(tp_world_size,1) * local_expert_num]
6634
+ Format: ND, non-contiguous allowed.
6635
+ - **expert_scales** (Tensor) - Top-K expert weights per token.
6636
+ - **ep_world_size** (int) - EP domain size.
6637
+ - Ascend 910B: Supports 16, 32, 64.
6638
+ - Ascend 910_93: Supports 8, 16, 32, 64, 128, 144, 256, 288.
6639
+ - **ep_rank_id** (int) - Local rank ID in EP domain [0, ep_world_size), must be unique per domain.
6640
+ - **moe_expert_num** (int) - Number of MoE experts (0 < moe_expert_num <= 256),
6641
+ must satisfy moe_expert_num % (ep_world_size-shared_expert_rank_num) = 0.
6642
+ - **tp_send_counts** (Tensor) - Tokens that each TP rank needs to send (when TP exists). It's the output of dispatch operation. Default: ``None``.
6643
+ - Ascend 910B: Not supported.
6644
+ - Ascend 910_93: 1D int32 tensor [tp_world_size] when TP exists. Format: ND, non-contiguous allowed.
6645
+ - **x_active_mask** (Tensor) - Reserved parameter. Default: ``None``.
6646
+ - **activate_scale** (Tensor) - Reserved parameter. Default: ``None``.
6647
+ - **weight_scale** (Tensor) - Reserved parameter. Default: ``None``.
6648
+ - **group_list** (Tensor) - Reserved parameter. Default: ``None``.
6649
+ - **expand_scales** (Tensor) - Output of dispatch operation. Default: ``None``.
6650
+ - Ascend 910B: 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6651
+ - Ascend 910_93: Unsupported.
6652
+ - **group_ep** (str) - EP communication domain name (string length 1-127), must differ from group_tp. Default: ``None``.
6653
+ - **group_tp** (str) - TP communication domain name. Default: ``None``.
6654
+ - Ascend 910B: Unsupported (pass empty string).
6655
+ - Ascend 910_93: When TP communication exists, string length 1-127, must differ from group_ep.
6656
+ - **tp_world_size** (int) - TP domain size. Default: ``0``.
6657
+ - Ascend 910B: Unsupported (pass 0).
6658
+ - Ascend 910_93: 0/1 means no TP communication; only 2 supported when TP exists.
6659
+ - **tp_rank_id** (int) - Local rank ID in TP domain. Default: ``0``.
6660
+ - Ascend 910B: Unsupported (pass 0).
6661
+ - Ascend 910_93: [0,1], unique per domain; pass 0 when no TP communication.
6662
+ - **expert_shard_type** (int) - Shared expert distribution type. Default: ``0``.
6663
+ - Ascend 910B: Unsupported (pass 0).
6664
+ - Ascend 910_93: Currently only 0 (shared experts precede MoE experts).
6665
+ - **shared_expert_num** (int) - Number of shared experts. Default: ``0``.
6666
+ - Ascend 910B: Unsupported (pass 0).
6667
+ - Ascend 910_93: Currently 0 (none) or 1 (one shared expert).
6668
+ - **shared_expert_rank_num** (int) - Number of ranks hosting shared experts. Default: ``0``.
6669
+ - Ascend 910B: Unsupported (pass 0).
6670
+ - Ascend 910_93: [0, ep_world_size-1), must satisfy ep_world_size % shared_expert_rank_num = 0 when non-zero.
6671
+ - **global_bs** (int) - Global batch size across EP domain. Default: ``0``.
6672
+ - Ascend 910B: 256*ep_world_size when BS varies per rank; 0 or BS*ep_world_size when uniform.
6673
+ - Ascend 910_93: 0 or BS*ep_world_size.
6674
+ - **out_dtype** (int) - Specify the type of output x. Reserved parameter (pass 0 in current version). Default: ``0``.
6675
+ - **common_quant_mode** (int) - Communication quantification type. Reserved parameter (pass 0 in current version). Default: ``0``.
6676
+ - **group_list_type** (int) - The format of group_list. Reserved parameter (pass 0 in current version). Default: ``0``.
6677
+
6678
+ Outputs:
6679
+ - **x** (Tensor) - Processed tokens. 2D tensor [BS, H] with dtype matching input `expand_x`.
6680
+
6681
+ Raises:
6682
+ TypeError: If input dtypes don't match specifications.
6683
+ ValueError: If input values violate constraints (e.g., invalid expert indices).
6684
+ RuntimeError: If communication domain configuration is invalid.
6685
+
6686
+ Supported Platforms:
6687
+ ``Ascend``
6688
+
6689
+ Examples:
6690
+ >>> # EP-only communication example (Ascend 910B)
6691
+ >>> import mindspore as ms
6692
+ >>> from mindspore import Tensor
6693
+ >>> from mindspore import ops
6694
+ >>> from mindspore.communication import init, get_rank, GlobalComm
6695
+ >>> from mindspore.ops.auto_generate import moe_distribute_dispatch, moe_distribute_combine
6696
+ >>> import numpy as np
6697
+ >>> bs = 8
6698
+ >>> h = 7168
6699
+ >>> k = 8
6700
+ >>> ep_world_size = 16
6701
+ >>> moe_expert_num = 16
6702
+ >>> global_bs = bs * ep_world_size
6703
+ >>> x = Tensor(np.random.randn(bs, h), ms.float16)
6704
+ >>> expert_ids = Tensor(np.random.randint(0, moe_expert_num, (bs, k)), ms.int32)
6705
+ >>> expert_scales = Tensor(np.random.randn(bs, k), ms.float32)
6706
+ >>> init()
6707
+ >>> rank_id = get_rank()
6708
+ >>> expand_x, _, expand_idx, _, ep_recv_count, _, expand_scale = moe_distribute_dispatch(
6709
+ ... x, expert_ids, expert_scales, ep_world_size, rank_id, moe_expert_num,
6710
+ ... group_ep=GlobalComm.WORLD_COMM_GROUP)
6711
+ >>> out_x = moe_distribute_combine(
6712
+ ... expand_x, expert_ids, expand_idx, ep_recv_count, expert_scales, ep_world_size, rank_id,
6713
+ ... moe_expert_num, group_ep=GlobalComm.WORLD_COMM_GROUP)
6714
+ >>> print(out_x.shape)
6715
+ (8, 7168)
6716
+ """
6717
+ return moe_distribute_combine_op(expand_x, expert_ids, expand_idx, ep_send_counts, expert_scales, ep_world_size, ep_rank_id, moe_expert_num, tp_send_counts, x_active_mask, activate_scale, weight_scale, group_list, expand_scales, group_ep, group_tp, tp_world_size, tp_rank_id, expert_shard_type, shared_expert_num, shared_export_rank_num, global_bs, out_dtype, common_quant_mode, group_list_type)
6718
+
6719
+
6720
+ def moe_distribute_dispatch(x, expert_ids, ep_world_size, ep_rank_id, moe_expert_num, expert_scales=None, scales=None, x_active_mask=None, group_ep=None, group_tp=None, tp_world_size=0, tp_rank_id=0, expert_shard_type=0, shared_expert_num=0, shared_expert_rank_num=0, quant_mode=0, global_bs=0, expert_token_nums_type=0):
6721
+ r"""
6722
+ Performs token data quantization (optional) and parallel communication for Mixture of Experts (MoE).
6723
+ When Tensor Parallelism (TP) communication exists, it first performs Expert Parallelism (EP) AllToAllV
6724
+ communication followed by TP AllGatherV communication. Otherwise, only EP AllToAllV communication is performed.
6725
+
6726
+ Notes:
6727
+ - A: Maximum tokens to dispatch per rank:
6728
+ - For shared experts: A = BS * ep_world_size * shared_expert_num / shared_expert_rank_num
6729
+ - For MoE experts:
6730
+ - When global_bs = 0: A >= BS * ep_world_size * min(local_expert_num, K)
6731
+ - When global_bs != 0: A >= global_bs * min(local_expert_num, K)
6732
+ - H (hidden size): Dimension of each token's hidden state
6733
+ - Ascend 910B: 0 < H <= 7168, must be multiple of 32
6734
+ - Ascend 910_93: H = 7168
6735
+ - BS (batch sequence size): Number of tokens processed per rank
6736
+ - Ascend 910B: 0 < BS <= 256
6737
+ - Ascend 910_93: 0 < BS <= 512
6738
+ - K: Number of experts selected per token (0 < K <= 8 and K <= moe_expert_num)
6739
+ - server_num: Number of server nodes (supports 2, 4, 8)
6740
+ - local_expert_num: Number of experts per rank:
6741
+ - Shared expert ranks: local_expert_num = 1
6742
+ - MoE expert ranks: local_expert_num = moe_expert_num / (ep_world_size - shared_expert_rank_num)
6743
+ (TP communication not supported when localExpertNum > 1)
6744
+
6745
+ Inputs:
6746
+ - **x** (Tensor) - Input token data to be sent. 2D tensor with shape [BS, H].
6747
+ Supported dtypes: float16, bfloat16. Format: ND, non-contiguous allowed.
6748
+ - **expert_ids** (Tensor) - Top-K expert indices for each token. 2D int32 tensor with shape [BS, K].
6749
+ Format: ND, non-contiguous allowed.
6750
+ - **ep_world_size** (int64) - EP domain size.
6751
+ - Ascend 910B: Supports 16, 32, 64.
6752
+ - Ascend 910_93: Supports 8, 16, 32, 64, 128, 144, 256, 288.
6753
+ - **ep_rank_id** (int64) - Local rank ID in EP domain [0, ep_world_size), must be unique per domain.
6754
+ - **moe_expert_num** (int64) - Number of MoE experts (0 < moe_expert_num <= 256),
6755
+ must satisfy moe_expert_num % (ep_world_size-shared_expert_rank_num) = 0.
6756
+ - **expert_scales** (Tensor) - Top-K expert weights per token.
6757
+ - Ascend 910B: 2D float32 tensor [BS, K], ND format, non-contiguous allowed.
6758
+ - Ascend 910_93: Unsupported (pass nullptr).
6759
+ - **scales** (Tensor) - Expert weights. 2D float32 tensor with shape [shared_expert_num + moe_expert_num, H].
6760
+ Pass nullptr for non-quantized scenarios. Format: ND, non-contiguous allowed.
6761
+ Note: On Ascend 910B, must be nullptr when HCCL_INTRA_PCIE_ENABLE=1 and HCCL_INTRA_ROCE_ENABLE=0.
6762
+ - **x_active_mask** (Tensor) - Reserved parameter (pass nullptr in current version).
6763
+ - **group_ep** (str) - EP communication domain name (string length 1-127), must differ from group_tp.
6764
+ - **group_tp** (str) - TP communication domain name.
6765
+ - Ascend 910B: Unsupported (pass empty string).
6766
+ - Ascend 910_93: When TP communication exists, string length 1-127, must differ from group_ep.
6767
+ - **tp_world_size** (int64) - TP domain size.
6768
+ - Ascend 910B: Unsupported (pass 0).
6769
+ - Ascend 910_93: 0/1 means no TP communication; only 2 supported when TP exists.
6770
+ - **tp_rank_id** (int64) - Local rank ID in TP domain.
6771
+ - Ascend 910B: Unsupported (pass 0).
6772
+ - Ascend 910_93: [0,1], unique per domain; pass 0 when no TP communication.
6773
+ - **expert_shard_type** (int64) - Shared expert distribution type.
6774
+ - Ascend 910B: Unsupported (pass 0).
6775
+ - Ascend 910_93: Currently only 0 (shared experts precede MoE experts).
6776
+ - **shared_expert_num** (int64) - Number of shared experts.
6777
+ - Ascend 910B: Unsupported (pass 0).
6778
+ - Ascend 910_93: Currently 0 (none) or 1 (one shared expert).
6779
+ - **shared_expert_rank_num** (int64) - Number of ranks hosting shared experts.
6780
+ - Ascend 910B: Unsupported (pass 0).
6781
+ - Ascend 910_93: [0, ep_world_size-1), must satisfy ep_world_size % shared_expert_rank_num = 0 when non-zero.
6782
+ - **quant_mode** (int64) - Quantization mode: 0 (none), 2 (dynamic quantization).
6783
+ - **global_bs** (int64) - Global batch size across EP domain.
6784
+ - Ascend 910B: 256*ep_world_size when BS varies per rank; 0 or BS*ep_world_size when uniform.
6785
+ - Ascend 910_93: 0 or BS*ep_world_size.
6786
+ - **expert_token_nums_type** (int64) - Semantic meaning of expert_token_nums output:
6787
+ 0 (prefix sums), 1 (raw counts).
6788
+
6789
+ Outputs:
6790
+ - **expand_x** (Tensor) - Expanded token features. 2D tensor [A, H] with dtype matching input.
6791
+ Supported dtypes: float16, bfloat16, int8. Format: ND, non-contiguous allowed.
6792
+ - **dynamic_scales** (Tensor) - Dynamic quantization scales (when quant_mode=2).
6793
+ 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6794
+ - **expand_idx** (Tensor) - Token counts per expert for combine operation.
6795
+ 1D int32 tensor [BS*K]. Format: ND, non-contiguous allowed.
6796
+ - **expert_token_nums** (Tensor) - Tokens received per expert.
6797
+ 1D int64 tensor [local_expert_num]. Format: ND, non-contiguous allowed.
6798
+ - **ep_recv_counts** (Tensor) - Tokens received from each EP rank.
6799
+ - Ascend 910B: 1D int32 tensor [moe_expert_num + 2 * global_bs * K * server_num]
6800
+ - Ascend 910_93: 1D int32 tensor [ep_world_size * max(tp_world_size,1) * local_expert_num]
6801
+ Format: ND, non-contiguous allowed.
6802
+ - **tp_recv_counts** (Tensor) - Tokens received from each TP rank (when TP exists).
6803
+ - Ascend 910B: Not supported.
6804
+ - Ascend 910_93: 1D int32 tensor [tp_world_size] when TP exists. Format: ND, non-contiguous allowed.
6805
+ - **expand_scales** (Tensor) - Output token weights for combine operation.
6806
+ - Ascend 910B: 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6807
+ - Ascend 910_93: Unsupported.
6808
+
6809
+ Raises:
6810
+ TypeError: If input dtypes don't match specifications.
6811
+ ValueError: If input values violate constraints (e.g., invalid expert indices).
6812
+ RuntimeError: If communication domain configuration is invalid.
6813
+
6814
+ Supported Platforms:
6815
+ ``Ascend``
6816
+
6817
+ Examples:
6818
+ >>> # EP-only communication example (Ascend 910B)
6819
+ >>> import mindspore as ms
6820
+ >>> from mindspore import Tensor
6821
+ >>> from mindspore import ops
6822
+ >>> from mindspore.communication import init, get_rank, GlobalComm
6823
+ >>> from mindspore.ops.auto_generate import moe_distribute_dispatch
6824
+ >>> import numpy as np
6825
+ >>> bs = 8
6826
+ >>> h = 7168
6827
+ >>> k = 8
6828
+ >>> ep_world_size = 16
6829
+ >>> moe_expert_num = 16
6830
+ >>> global_bs = bs * ep_world_size
6831
+ >>> x = Tensor(np.random.randn(bs, h), ms.float16)
6832
+ >>> expert_ids = Tensor(np.random.randint(0, moe_expert_num, (bs, k)), ms.int32)
6833
+ >>> expert_scales = Tensor(np.random.randn(bs, k), ms.float32)
6834
+ >>> init()
6835
+ >>> rank_id = get_rank()
6836
+ >>> out = moe_distribute_dispatch(
6837
+ ... x, expert_ids, ep_world_size, rank_id, moe_expert_num, expert_scales=expert_scales,
6838
+ ... group_ep=GlobalComm.WORLD_COMM_GROUP)
6839
+ >>> print(out[0].shape) # expand_x
6840
+ (128, 7168)
6841
+ """
6842
+ return moe_distribute_dispatch_op(x, expert_ids, ep_world_size, ep_rank_id, moe_expert_num, expert_scales, scales, x_active_mask, group_ep, group_tp, tp_world_size, tp_rank_id, expert_shard_type, shared_expert_num, shared_expert_rank_num, quant_mode, global_bs, expert_token_nums_type)
6843
+
6844
+
6498
6845
  def moe_token_permute_grad(permuted_tokens_grad, sorted_indices, num_topk=1, padded_mode=False):
6499
6846
  r"""
6500
6847
 
@@ -6508,11 +6855,10 @@ def moe_token_permute(tokens, indices, num_out_tokens=None, padded_mode=False):
6508
6855
 
6509
6856
  .. warning::
6510
6857
  - It is only supported on Atlas A2 Training Series Products.
6511
- - The input `tokens` only supports the bfloat16 data type in the current version.
6512
6858
  - When `indices` is 2-D, the size of the second dim must be less than or equal to 512.
6513
6859
 
6514
6860
  Args:
6515
- tokens (Tensor): The input token tensor to be permuted. The dtype is bfloat16.
6861
+ tokens (Tensor): The input token tensor to be permuted. The dtype is bfloat16, float16 or float32.
6516
6862
  The shape is :math:`(num\_tokens, hidden\_size)` , where `num_tokens` and `hidden_size` are positive integers.
6517
6863
  indices (Tensor): The tensor specifies indices used to permute the tokens. The dtype is int32 or int64.
6518
6864
  The shape is :math:`(num\_tokens, topk)` or :math:`(num\_tokens,)`, where `num_tokens` and `topk` are positive integers.
@@ -6528,7 +6874,6 @@ def moe_token_permute(tokens, indices, num_out_tokens=None, padded_mode=False):
6528
6874
 
6529
6875
  Raises:
6530
6876
  TypeError: If `tokens` or `indices` is not a Tensor.
6531
- TypeError: If dtype of `tokens` is not bfloat16.
6532
6877
  TypeError: If dtype of `indices` is not int32 or int64.
6533
6878
  TypeError: If specified `num_out_tokens` is not an integer.
6534
6879
  TypeError: If specified `padded_mode` is not a bool.
@@ -6570,60 +6915,6 @@ def moe_token_unpermute_grad(permuted_tokens, unpermuted_tokens_grad, sorted_ind
6570
6915
  return moe_token_unpermute_grad_op(permuted_tokens, unpermuted_tokens_grad, sorted_indices, probs, padded_mode, restore_shape)
6571
6916
 
6572
6917
 
6573
- def moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
6574
- r"""
6575
- Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities.
6576
-
6577
- .. warning::
6578
- - It is only supported on Atlas A2 Training Series Products.
6579
- - The inputs `permuted_tokens` and `probs` only support the bfloat16 data type in the current version.
6580
- - `sorted_indices` must not have duplicate values, otherwise the result is undefined.
6581
-
6582
- Args:
6583
- permuted_tokens (Tensor): The tensor of permuted tokens to be unpermuted.
6584
- The shape is :math:`[num\_tokens * topk, hidden\_size]` , where `num_tokens`, `topk` and `hidden_size` are positive integers.
6585
- sorted_indices (Tensor): The tensor of sorted indices used to unpermute the tokens.
6586
- The shape is :math:`[num\_tokens * topk,]`, where `num_tokens` and `topk` are positive integers.
6587
- It only supports the int32 data type.
6588
- probs (Tensor, optional): The tensor of probabilities corresponding to the permuted tokens.
6589
- If provided, the unpermuted tokens will be merged with their respective probabilities.
6590
- The shape is :math:`[num\_tokens, topk]`, where `num_tokens` and `topk` are positive integers. Default: ``None`` .
6591
- padded_mode (bool, optional): If ``True``, indicating the indices are padded to denote selected tokens per expert. Default: ``False`` .
6592
- restore_shape (Union[tuple[int], list[int]], optional): The input shape before permutation, only used in padding mode. Default: ``None`` .
6593
-
6594
- Returns:
6595
- Tensor, with the same dtype as `permuted_tokens`. If `padded_mode` is ``False``, the shape will be [`num_tokens`, `hidden_size`].
6596
- If `padded_mode` is ``True``, the shape will be specified by `restore_shape`.
6597
-
6598
- Raises:
6599
- TypeError: If `permuted_tokens` is not a Tensor.
6600
- ValueError: Only supported when `padded_mode` is ``False``.
6601
-
6602
- Supported Platforms:
6603
- ``Ascend``
6604
-
6605
- Examples:
6606
- >>> import mindspore
6607
- >>> from mindspore import Tensor, ops
6608
- >>> permuted_token = Tensor([
6609
- ... [1, 1, 1],
6610
- ... [0, 0, 0],
6611
- ... [0, 0, 0],
6612
- ... [3, 3, 3],
6613
- ... [2, 2, 2],
6614
- ... [1, 1, 1],
6615
- ... [2, 2, 2],
6616
- ... [3, 3, 3]], dtype=mindspore.bfloat16)
6617
- >>> sorted_indices = Tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=mindspore.int32)
6618
- >>> out = ops.moe_token_unpermute(permuted_token, sorted_indices)
6619
- >>> out.shape
6620
- (8, 3)
6621
-
6622
-
6623
- """
6624
- return moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)
6625
-
6626
-
6627
6918
  def mse_loss_ext(input, target, reduction='mean'):
6628
6919
  r"""
6629
6920
  Calculates the mean squared error between the predicted value and the label value.
@@ -6746,10 +7037,10 @@ def mv(input, vec):
6746
7037
  Examples:
6747
7038
  >>> import mindspore
6748
7039
  >>> import numpy as np
6749
- >>> from mindspore import Tensor, mint
7040
+ >>> from mindspore import Tensor, ops
6750
7041
  >>> input = Tensor(np.array([[3., 4.], [1., 6.], [1., 3.]]).astype(np.float32))
6751
7042
  >>> vec = Tensor(np.array([1., 2.]).astype(np.float32))
6752
- >>> output = mint.mv(input, vec)
7043
+ >>> output = ops.auto_generate.mv(input, vec)
6753
7044
  >>> print(output)
6754
7045
  [11. 13. 7.]
6755
7046
  """
@@ -6822,14 +7113,14 @@ def narrow(input, dim, start, length):
6822
7113
 
6823
7114
  Examples:
6824
7115
  >>> import mindspore
6825
- >>> from mindspore import mint
7116
+ >>> from mindspore import ops
6826
7117
  >>> from mindspore import Tensor
6827
7118
  >>> x = Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], mindspore.int32)
6828
- >>> output = mint.narrow(x, 0, 0, 2)
7119
+ >>> output = ops.auto_generate.narrow(x, 0, 0, 2)
6829
7120
  >>> print(output)
6830
7121
  [[ 1 2 3]
6831
7122
  [ 4 5 6]]
6832
- >>> output = mint.narrow(x, 1, 1, 2)
7123
+ >>> output = ops.auto_generate.narrow(x, 1, 1, 2)
6833
7124
  >>> print(output)
6834
7125
  [[ 2 3]
6835
7126
  [ 5 6]
@@ -6838,6 +7129,47 @@ def narrow(input, dim, start, length):
6838
7129
  return narrow_op(input, dim, start, length)
6839
7130
 
6840
7131
 
7132
+ def narrow_view(input, dim, start, length):
7133
+ r"""
7134
+ Obtains a tensor of a specified length at a specified start position along a specified axis.
7135
+
7136
+ Args:
7137
+ input (Tensor): the tensor to narrow.
7138
+ dim (int): the axis along which to narrow.
7139
+ start (Union[int, Tensor[int]]): the starting dimension.
7140
+ length (int): the distance to the ending dimension.
7141
+
7142
+ Returns:
7143
+ output (Tensors) - The narrowed tensor.
7144
+
7145
+ Raises:
7146
+ ValueError: the rank of `input` is 0.
7147
+ ValueError: the value of `dim` is out the range [-input.ndim, input.ndim).
7148
+ ValueError: the value of `start` is out the range [-input.shape[dim], input.shape[dim]].
7149
+ ValueError: the value of `length` is out the range [0, input.shape[dim]-start].
7150
+
7151
+ Supported Platforms:
7152
+ ``Ascend``
7153
+
7154
+ Examples:
7155
+ >>> import mindspore
7156
+ >>> from mindspore import ops
7157
+ >>> from mindspore.ops.auto_generate import NarrowView
7158
+ >>> from mindspore import Tensor
7159
+ >>> x = Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], mindspore.int32)
7160
+ >>> output = NarrowView()(x, 0, 0, 2)
7161
+ >>> print(output)
7162
+ [[ 1 2 3]
7163
+ [ 4 5 6]]
7164
+ >>> output = NarrowView()(x, 1, 1, 2)
7165
+ >>> print(output)
7166
+ [[ 2 3]
7167
+ [ 5 6]
7168
+ [ 8 9]]
7169
+ """
7170
+ return narrow_view_op(input, dim, start, length)
7171
+
7172
+
6841
7173
  def neg(input):
6842
7174
  r"""
6843
7175
  Returns a tensor with negative values of the input tensor element-wise.
@@ -7156,65 +7488,6 @@ def prod_ext(input, dim=None, keepdim=False, dtype=None):
7156
7488
  return prod_ext_op(input, dim, keepdim, dtype)
7157
7489
 
7158
7490
 
7159
- def prompt_k_v_cache(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, align_mode='LEFT'):
7160
- r"""
7161
- The PromptKVCache is used for prefill the KVCache of transformer network.
7162
-
7163
- Args:
7164
- cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
7165
- When format is BHSD, cache tensor of shape
7166
- :math:`(cache\_batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
7167
- When format is BSD, cache tensor of shape
7168
- :math:`(cache\_batch\_size, max\_seq\_length, hidden\_size)`.
7169
- update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
7170
- When format is BHSD, cache tensor of shape
7171
- :math:`(update\_batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
7172
- When format is BSD, cache tensor of shape
7173
- :math:`(update\_batch\_size, max\_seq\_length, hidden\_size)`.
7174
- valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
7175
- Valid_seq_len tensor of shape :math:`(update\_batch\_size)`.
7176
- batch_index (Tensor): The batch_index tensor with data type of int64.
7177
- Batch_index tensor of shape :math:`(update\_batch\_size)`. Indicate that which batch of cache tensor is going to be update.
7178
- seq_len_axis (Tensor): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Not able for now.
7179
- new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
7180
- New_max_seq_len tensor of shape :math:`(1)`.
7181
- Indicate that user want to change the shape of cache tensor from
7182
- :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)` to
7183
- :math:`(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`
7184
- to update the cache tensor. This will not real change the shape of `cache` tensor. Not able for now.
7185
- cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
7186
- Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
7187
- align_mode (str): indicate which axis is seq_len. Default: left.
7188
-
7189
-
7190
- Outputs:
7191
- With same data type and same shape as `cache` tensor.
7192
-
7193
- Supported Platforms:
7194
- ``Ascend``
7195
-
7196
- Examples:
7197
- >>> from mindspore import Tensor
7198
- >>> from mindspore.ops.operations import _inner_ops
7199
- >>> b = 4
7200
- >>> h = 40
7201
- >>> max_s = 1024
7202
- >>> s = 256
7203
- >>> d = 128
7204
- >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
7205
- >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
7206
- >>> valid_seq_len = Tensor(np.random.randint(-1, s, size=ub).astype(np.int64))
7207
- >>> batch_index = Tensor(np.random.choice(np.arange(-1, b), size=ub, replace=False).astype(np.int64))
7208
- >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
7209
- >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
7210
- >>> prompt_kv_cache = _inner_ops.PromptKVCache(0)
7211
- >>> output = prompt_kv_cache(cache, update, valid_seq_len, batch_index, Tensor(2), new_max_seq_len, cur_max_seq_len)
7212
- >>> print(cache)
7213
- """
7214
- prompt_k_v_cache_op = _get_cache_prim(PromptKVCache)(align_mode)
7215
- return prompt_k_v_cache_op(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len)
7216
-
7217
-
7218
7491
  def randperm(n, seed=0, offset=0, dtype=mstype.int64):
7219
7492
  r"""
7220
7493
  Generates random permutation of integers from 0 to n-1.
@@ -7550,7 +7823,7 @@ def rfft2(input, s=None, dim=(-2, -1), norm=None):
7550
7823
 
7551
7824
  Note:
7552
7825
  - `rfft2` is currently only used in `mindscience` scientific computing scenarios and
7553
- dose not support other usage scenarios.
7826
+ does not support other usage scenarios.
7554
7827
  - `rfft2` is not supported on Windows platform yet.
7555
7828
 
7556
7829
  Args:
@@ -7611,7 +7884,7 @@ def rfftfreq(n, d=1.0, dtype=None):
7611
7884
 
7612
7885
  Note:
7613
7886
  - `rfftfreq` is currently only used in `mindscience` scientific computing scenarios and
7614
- dose not support other usage scenarios.
7887
+ does not support other usage scenarios.
7615
7888
  - `rfftfreq` is not supported on Windows platform yet.
7616
7889
 
7617
7890
  Args:
@@ -7644,7 +7917,7 @@ def rfftn(input, s=None, dim=None, norm=None):
7644
7917
 
7645
7918
  Note:
7646
7919
  - `rfftn` is currently only used in `mindscience` scientific computing scenarios and
7647
- dose not support other usage scenarios.
7920
+ does not support other usage scenarios.
7648
7921
  - `rfftn` is not supported on Windows platform yet.
7649
7922
 
7650
7923
  Args:
@@ -7704,7 +7977,7 @@ def rfft(input, n=None, dim=-1, norm=None):
7704
7977
 
7705
7978
  Note:
7706
7979
  - `rfft` is currently only used in `mindscience` scientific computing scenarios and
7707
- dose not support other usage scenarios.
7980
+ does not support other usage scenarios.
7708
7981
  - `rfft` is not supported on Windows platform yet.
7709
7982
 
7710
7983
  Args:
@@ -8077,7 +8350,7 @@ def select_v2(condition, input, other):
8077
8350
  return select_v2_op(condition, input, other)
8078
8351
 
8079
8352
 
8080
- def select_ext(input, dim, index):
8353
+ def select_ext_view(input, dim, index):
8081
8354
  r"""
8082
8355
  Slices the input tensor along the selected dimension at the given index.
8083
8356
 
@@ -8099,15 +8372,14 @@ def select_ext(input, dim, index):
8099
8372
  ``Ascend``
8100
8373
 
8101
8374
  Examples:
8102
- >>> import mindspore
8103
- >>> from mindspore import Tensor, mint
8375
+ >>> from mindspore import Tensor, ops
8104
8376
  >>> input = Tensor([[2, 3, 4, 5],[3, 2, 4, 5]])
8105
- >>> y = mint.select(input, 0, 0)
8377
+ >>> y = ops.auto_generate.select_ext_view(input, 0, 0)
8106
8378
  >>> print(y)
8107
8379
  [2 3 4 5]
8108
8380
 
8109
8381
  """
8110
- return select_ext_op(input, dim, index)
8382
+ return select_ext_view_op(input, dim, index)
8111
8383
 
8112
8384
 
8113
8385
  def select(condition, input, other):
@@ -8195,13 +8467,13 @@ def selu_ext(input):
8195
8467
 
8196
8468
  Examples:
8197
8469
  >>> import mindspore
8198
- >>> from mindspore import Tensor, mint
8470
+ >>> from mindspore import Tensor, ops
8199
8471
  >>> import numpy as np
8200
8472
  >>> input = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
8201
- >>> output = mint.nn.functional.selu(input)
8473
+ >>> output = ops.auto_generate.selu_ext(input)
8202
8474
  >>> print(output)
8203
8475
  [[-1.1113307 4.202804 -1.7575096]
8204
- [ 2.101402 -1.7462534 9.456309 ]]
8476
+ [ 2.101402 -1.7462534 9.456309 ]]
8205
8477
  """
8206
8478
  return selu_ext_op(input)
8207
8479
 
@@ -8634,7 +8906,7 @@ def solve_triangular(a, b, trans=0, lower=False, unit_diagonal=False):
8634
8906
 
8635
8907
  Note:
8636
8908
  - `solve_triangular` is currently only used in `mindscience` scientific computing scenarios and
8637
- dose not support other usage scenarios.
8909
+ does not support other usage scenarios.
8638
8910
  - `solve_triangular` is not supported on Windows platform yet.
8639
8911
 
8640
8912
  Args:
@@ -9065,9 +9337,6 @@ def swiglu(input, dim=-1):
9065
9337
  Computes SwiGLU (Swish-Gated Linear Unit activation function) of input tensor.
9066
9338
  SwiGLU is a variant of the :class:`mindspore.ops.GLU` activation function, it is defined as:
9067
9339
 
9068
- .. warning::
9069
- This is an experimental API that is subject to change or deletion.
9070
-
9071
9340
  .. math::
9072
9341
  {SwiGLU}(a, b)= Swish(a) \otimes b
9073
9342
 
@@ -9075,6 +9344,9 @@ def swiglu(input, dim=-1):
9075
9344
  Swish(a)=a :math:`\sigma` (a), :math:`\sigma` is the :func:`mindspore.ops.sigmoid` activation function
9076
9345
  and :math:`\otimes` is the Hadamard product.
9077
9346
 
9347
+ .. warning::
9348
+ Only support on Atlas A2 training series.
9349
+
9078
9350
  Args:
9079
9351
  input (Tensor): Tensor to be split. It has shape :math:`(\ast_1, N, \ast_2)`
9080
9352
  where `*` means, any number of additional dimensions. :math:`N` must be divisible by 2.
@@ -9307,6 +9579,30 @@ def topk_ext(input, k, dim=-1, largest=True, sorted=True):
9307
9579
  return topk_ext_op(input, k, dim, largest, sorted)
9308
9580
 
9309
9581
 
9582
+ def topprouter(input, capacity, expert_num, drop_type=0, threshold=0.0, router_prob=0.0):
9583
+ r"""
9584
+ TopPRouter implementation in MOE.
9585
+
9586
+ Inputs:
9587
+ - **x** (Tensor) - Input Tensor of 3D, supporting types:[int32, int64]
9588
+ - **capacity** (Int64) - The maximum number of tokens each expert can handle.
9589
+ - **expert_num** (Int64) - The number of expert.
9590
+ - **drop_type** (Int64) - S-Drop/K-Drop, 0 means S-Drop, 1 means K-Drop, default 0.
9591
+ - **threshold** (float32) - Expert threshold, default 0.
9592
+ - **router_prob** (Tensor) - Topk prob Tensor of 2D, supporting types:[float32], default 0.
9593
+
9594
+ Outputs:
9595
+ tuple(Tensor), tuple of 2 tensors, `dispatch_index` and `combine_inex`.
9596
+
9597
+ - dispatch_index (Tensor) - Token ID processed by each expert.
9598
+ - combine_index (Tensor) - The combine index of each token.
9599
+
9600
+ Supported Platforms:
9601
+ ``Ascend``
9602
+ """
9603
+ return topprouter_op(input, capacity, expert_num, drop_type, threshold, router_prob)
9604
+
9605
+
9310
9606
  def trace_ext(input):
9311
9607
  r"""
9312
9608
  Returns a new tensor that is the sum of the `input` main trace.
@@ -9372,7 +9668,7 @@ def trace(input):
9372
9668
  return trace_op(input)
9373
9669
 
9374
9670
 
9375
- def transpose_ext(input, dim0, dim1):
9671
+ def transpose_ext_view(input, dim0, dim1):
9376
9672
  r"""
9377
9673
  Interchange two axes of a tensor.
9378
9674
 
@@ -9397,14 +9693,13 @@ def transpose_ext(input, dim0, dim1):
9397
9693
 
9398
9694
  Examples:
9399
9695
  >>> import numpy as np
9400
- >>> from mindspore import mint
9401
- >>> from mindspore import Tensor
9402
- >>> input = Tensor(np.ones((2,3,4), dtype=np.float32))
9403
- >>> output = mint.transpose(input, 0, 2)
9696
+ >>> from mindspore import Tensor, ops
9697
+ >>> input = Tensor(np.ones((2, 3, 4), dtype=np.float32))
9698
+ >>> output = ops.auto_generate.transpose_ext_view(input, 0, 2)
9404
9699
  >>> print(output.shape)
9405
9700
  (4, 3, 2)
9406
9701
  """
9407
- return transpose_ext_op(input, dim0, dim1)
9702
+ return transpose_ext_view_op(input, dim0, dim1)
9408
9703
 
9409
9704
 
9410
9705
  def transpose(input, input_perm):
@@ -9440,6 +9735,57 @@ def transpose(input, input_perm):
9440
9735
  return transpose_op(input, input_perm)
9441
9736
 
9442
9737
 
9738
+ def transpose_view(input, input_perm):
9739
+ r"""
9740
+ Permutes the dimensions of the input tensor according to input permutation.
9741
+
9742
+ For a 1-D array this has no effect, as a transposed vector is simply the same vector.
9743
+ To convert a 1-D array into a 2D column vector please refer to :func:`mindspore.ops.expand_dims`.
9744
+ For a 2-D array, this is a standard matrix transpose. For an n-D array, if axes are given,
9745
+ their order indicates how the axes are permuted (see Examples).
9746
+ If axes are not provided and a.shape is :math:`(i[0], i[1], ... i[n-2], i[n-1])`,
9747
+ then a.transpose().shape is :math:`(i[n-1], i[n-2], ... i[1], i[0])`.
9748
+
9749
+ Note:
9750
+ On GPU and CPU, if the value of `input_perm` is negative, its actual value is `input_perm[i] + rank(input)`.
9751
+ Negative value of `input_perm` is not supported on Ascend.
9752
+
9753
+ Args:
9754
+ input (Tensor): The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
9755
+ input_perm (tuple[int]): The permutation to be converted. The elements in `input_perm` are composed of
9756
+ the indexes of each dimension of `input`. The length of `input_perm` and the shape of `input` must be
9757
+ the same. Only constant value is allowed. Must be in the range [-rank(input), rank(input)).
9758
+
9759
+ Returns:
9760
+ Tensor, the type of output tensor is the same as `input` and the shape of output tensor is decided by the
9761
+ shape of `input` and the value of `input_perm`.
9762
+
9763
+ Raises:
9764
+ TypeError: If `input_perm` is not a tuple.
9765
+ ValueError: If length of shape of `input` is not equal to length of shape of `input_perm`.
9766
+ ValueError: If the same element exists in `input_perm`.
9767
+
9768
+ Supported Platforms:
9769
+ ``Ascend``
9770
+
9771
+ Examples:
9772
+ >>> import mindspore
9773
+ >>> import numpy as np
9774
+ >>> from mindspore import Tensor, ops
9775
+ >>> input = Tensor(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]), mindspore.float32)
9776
+ >>> input_perm = (0, 2, 1)
9777
+ >>> output = ops.TransposeView()(input, input_perm)
9778
+ >>> print(output)
9779
+ [[[ 1. 4.]
9780
+ [ 2. 5.]
9781
+ [ 3. 6.]]
9782
+ [[ 7. 10.]
9783
+ [ 8. 11.]
9784
+ [ 9. 12.]]]
9785
+ """
9786
+ return transpose_view_op(input, input_perm)
9787
+
9788
+
9443
9789
  def triangular_solve(b, A, upper=True, transpose=False, unitriangular=False):
9444
9790
  r"""
9445
9791
  Solves a system of equations with a square upper or lower triangular invertible matrix `A` and multiple right-hand sides `b`.
@@ -10278,8 +10624,68 @@ def quant_batch_matmul(x1, x2, scale, offset=None, bias=None, pertokenScaleOptio
10278
10624
  return quant_batch_matmul_impl(x1, x2, scale, offset, bias, pertokenScaleOptional, transpose_x1, transpose_x2, dtype)
10279
10625
 
10280
10626
 
10627
+ def quant_matmul(x1, x2, scale, offset=None, pertoken_scale=None, bias=None, output_dtype=None, x1_dtype=None, x2_dtype=None, pertoken_scale_dtype=None, scale_dtype=None, group_sizes=None):
10628
+ r"""
10629
+
10630
+ """
10631
+ return quant_matmul_op(x1, x2, scale, offset, pertoken_scale, bias, output_dtype, x1_dtype, x2_dtype, pertoken_scale_dtype, scale_dtype, group_sizes)
10632
+
10633
+
10281
10634
  def weight_quant_batch_matmul(x, weight, antiquant_scale, antiquant_offset=None, quant_scale=None, quant_offset=None, bias=None, transpose_x=False, transpose_weight=False, antiquant_group_size=0):
10282
10635
  r"""
10283
10636
 
10284
10637
  """
10285
10638
  return weight_quant_batch_matmul_impl(x, weight, antiquant_scale, antiquant_offset, quant_scale, quant_offset, bias, transpose_x, transpose_weight, antiquant_group_size)
10639
+
10640
+
10641
+ def moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
10642
+ r"""
10643
+ Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities.
10644
+
10645
+ .. warning::
10646
+ - It is only supported on Atlas A2 Training Series Products.
10647
+ - `sorted_indices` must not have duplicate values, otherwise the result is undefined.
10648
+
10649
+ Args:
10650
+ permuted_tokens (Tensor): The tensor of permuted tokens to be unpermuted.
10651
+ The shape is :math:`[num\_tokens * topk, hidden\_size]` , where `num_tokens`, `topk` and `hidden_size` are positive integers.
10652
+ sorted_indices (Tensor): The tensor of sorted indices used to unpermute the tokens.
10653
+ The shape is :math:`[num\_tokens * topk,]`, where `num_tokens` and `topk` are positive integers.
10654
+ It only supports the int32 data type.
10655
+ probs (Tensor, optional): The tensor of probabilities corresponding to the permuted tokens.
10656
+ If provided, the unpermuted tokens will be merged with their respective probabilities.
10657
+ The shape is :math:`[num\_tokens, topk]`, where `num_tokens` and `topk` are positive integers. Default: ``None`` .
10658
+ padded_mode (bool, optional): If ``True``, indicating the indices are padded to denote selected tokens per expert. Default: ``False`` .
10659
+ restore_shape (Union[tuple[int], list[int]], optional): The input shape before permutation, only used in padding mode. Default: ``None`` .
10660
+
10661
+ Returns:
10662
+ Tensor, with the same dtype as `permuted_tokens`. If `padded_mode` is ``False``, the shape will be [`num_tokens`, `hidden_size`].
10663
+ If `padded_mode` is ``True``, the shape will be specified by `restore_shape`.
10664
+
10665
+ Raises:
10666
+ TypeError: If `permuted_tokens` is not a Tensor.
10667
+ ValueError: Only supported when `padded_mode` is ``False``.
10668
+
10669
+ Supported Platforms:
10670
+ ``Ascend``
10671
+
10672
+ Examples:
10673
+ >>> import mindspore
10674
+ >>> from mindspore import Tensor, ops
10675
+ >>> permuted_token = Tensor([
10676
+ ... [1, 1, 1],
10677
+ ... [0, 0, 0],
10678
+ ... [0, 0, 0],
10679
+ ... [3, 3, 3],
10680
+ ... [2, 2, 2],
10681
+ ... [1, 1, 1],
10682
+ ... [2, 2, 2],
10683
+ ... [3, 3, 3]], dtype=mindspore.bfloat16)
10684
+ >>> sorted_indices = Tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=mindspore.int32)
10685
+ >>> out = ops.moe_token_unpermute(permuted_token, sorted_indices)
10686
+ >>> out.shape
10687
+ (8, 3)
10688
+
10689
+
10690
+ """
10691
+ return moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)