mindspore 2.4.10__cp310-cp310-win_amd64.whl → 2.6.0rc1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (602) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +13 -6
  5. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  8. mindspore/_check_jit_forbidden_api.py +3 -0
  9. mindspore/_checkparam.py +3 -38
  10. mindspore/_deprecated/__init__.py +17 -0
  11. mindspore/_deprecated/jit.py +198 -0
  12. mindspore/_extends/builtin_operations.py +1 -1
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +6 -7
  15. mindspore/_extends/parse/compile_config.py +83 -0
  16. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  17. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
  18. mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
  19. mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
  20. mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
  21. mindspore/_extends/parse/parser.py +46 -197
  22. mindspore/_extends/parse/resources.py +1 -5
  23. mindspore/_extends/parse/standard_method.py +217 -98
  24. mindspore/_extends/pijit/__init__.py +2 -2
  25. mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
  26. mindspore/_extends/pijit/tensor_func_list.py +27 -0
  27. mindspore/_extends/utils.py +1 -1
  28. mindspore/amp.py +11 -5
  29. mindspore/atlprov.dll +0 -0
  30. mindspore/avcodec-59.dll +0 -0
  31. mindspore/avdevice-59.dll +0 -0
  32. mindspore/avfilter-8.dll +0 -0
  33. mindspore/avformat-59.dll +0 -0
  34. mindspore/avutil-57.dll +0 -0
  35. mindspore/boost/__init__.py +2 -2
  36. mindspore/boost/base.py +3 -7
  37. mindspore/boost/boost_cell_wrapper.py +138 -43
  38. mindspore/c1.dll +0 -0
  39. mindspore/c1xx.dll +0 -0
  40. mindspore/c2.dll +0 -0
  41. mindspore/common/__init__.py +6 -3
  42. mindspore/common/_grad_function.py +56 -0
  43. mindspore/common/_pijit_context.py +14 -5
  44. mindspore/common/_register_for_tensor.py +1 -2
  45. mindspore/common/_stub_tensor.py +30 -14
  46. mindspore/common/_tensor_cpp_method.py +17 -0
  47. mindspore/common/_tensor_docs.py +4760 -0
  48. mindspore/common/api.py +435 -371
  49. mindspore/common/auto_dynamic_shape.py +41 -44
  50. mindspore/common/dtype.py +39 -36
  51. mindspore/common/dump.py +9 -6
  52. mindspore/common/file_system.py +9 -1
  53. mindspore/common/generator.py +2 -0
  54. mindspore/common/hook_handle.py +6 -2
  55. mindspore/common/initializer.py +13 -10
  56. mindspore/common/jit_begin_end.py +94 -0
  57. mindspore/common/jit_config.py +6 -1
  58. mindspore/common/jit_context.py +76 -0
  59. mindspore/common/jit_trace.py +378 -0
  60. mindspore/common/lazy_inline.py +9 -3
  61. mindspore/common/mindir_util.py +10 -2
  62. mindspore/common/mutable.py +5 -4
  63. mindspore/common/parameter.py +135 -52
  64. mindspore/common/seed.py +2 -2
  65. mindspore/common/sparse_tensor.py +23 -17
  66. mindspore/common/tensor.py +951 -1992
  67. mindspore/communication/__init__.py +7 -5
  68. mindspore/communication/_comm_helper.py +52 -2
  69. mindspore/communication/comm_func.py +240 -181
  70. mindspore/communication/management.py +95 -26
  71. mindspore/context.py +314 -566
  72. mindspore/dataset/__init__.py +65 -37
  73. mindspore/dataset/audio/__init__.py +2 -8
  74. mindspore/dataset/audio/transforms.py +3 -17
  75. mindspore/dataset/callback/ds_callback.py +2 -1
  76. mindspore/dataset/core/config.py +87 -6
  77. mindspore/dataset/engine/cache_admin.py +3 -3
  78. mindspore/dataset/engine/cache_client.py +6 -5
  79. mindspore/dataset/engine/datasets.py +292 -267
  80. mindspore/dataset/engine/datasets_audio.py +22 -8
  81. mindspore/dataset/engine/datasets_standard_format.py +46 -27
  82. mindspore/dataset/engine/datasets_text.py +78 -48
  83. mindspore/dataset/engine/datasets_user_defined.py +182 -116
  84. mindspore/dataset/engine/datasets_vision.py +120 -44
  85. mindspore/dataset/engine/iterators.py +283 -63
  86. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
  87. mindspore/dataset/engine/obs/util.py +8 -0
  88. mindspore/dataset/engine/queue.py +40 -0
  89. mindspore/dataset/engine/samplers.py +289 -43
  90. mindspore/dataset/engine/serializer_deserializer.py +3 -2
  91. mindspore/dataset/engine/validators.py +53 -11
  92. mindspore/dataset/text/__init__.py +7 -6
  93. mindspore/dataset/text/transforms.py +6 -5
  94. mindspore/dataset/text/utils.py +3 -3
  95. mindspore/dataset/transforms/__init__.py +0 -9
  96. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  97. mindspore/dataset/transforms/transforms.py +31 -14
  98. mindspore/dataset/utils/browse_dataset.py +1 -1
  99. mindspore/dataset/vision/__init__.py +2 -9
  100. mindspore/dataset/vision/transforms.py +202 -158
  101. mindspore/dataset/vision/utils.py +7 -5
  102. mindspore/dataset/vision/validators.py +1 -2
  103. mindspore/device_context/__init__.py +21 -0
  104. mindspore/device_context/ascend/__init__.py +25 -0
  105. mindspore/device_context/ascend/device.py +72 -0
  106. mindspore/device_context/ascend/op_debug.py +153 -0
  107. mindspore/device_context/ascend/op_precision.py +193 -0
  108. mindspore/device_context/ascend/op_tuning.py +123 -0
  109. mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
  110. mindspore/device_context/cpu/device.py +62 -0
  111. mindspore/device_context/cpu/op_tuning.py +43 -0
  112. mindspore/device_context/gpu/__init__.py +21 -0
  113. mindspore/device_context/gpu/device.py +70 -0
  114. mindspore/device_context/gpu/op_precision.py +67 -0
  115. mindspore/device_context/gpu/op_tuning.py +175 -0
  116. mindspore/device_manager.py +170 -0
  117. mindspore/dnnl.dll +0 -0
  118. mindspore/dpcmi.dll +0 -0
  119. mindspore/experimental/es/embedding_service.py +35 -27
  120. mindspore/experimental/llm_boost/__init__.py +1 -0
  121. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  122. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
  123. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  124. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  125. mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
  126. mindspore/experimental/llm_boost/register.py +1 -0
  127. mindspore/experimental/map_parameter.py +4 -4
  128. mindspore/experimental/optim/adadelta.py +6 -6
  129. mindspore/experimental/optim/adagrad.py +4 -4
  130. mindspore/experimental/optim/adam.py +7 -0
  131. mindspore/experimental/optim/adamax.py +4 -4
  132. mindspore/experimental/optim/adamw.py +4 -0
  133. mindspore/experimental/optim/asgd.py +1 -1
  134. mindspore/experimental/optim/lr_scheduler.py +73 -46
  135. mindspore/experimental/optim/radam.py +34 -31
  136. mindspore/experimental/optim/rprop.py +1 -1
  137. mindspore/experimental/optim/sgd.py +1 -1
  138. mindspore/hal/contiguous_tensors_handle.py +6 -10
  139. mindspore/hal/device.py +55 -53
  140. mindspore/hal/event.py +52 -52
  141. mindspore/hal/memory.py +157 -117
  142. mindspore/hal/stream.py +150 -109
  143. mindspore/include/api/context.h +0 -1
  144. mindspore/include/dataset/constants.h +7 -4
  145. mindspore/include/dataset/execute.h +2 -2
  146. mindspore/jpeg62.dll +0 -0
  147. mindspore/log.py +50 -0
  148. mindspore/mindrecord/__init__.py +21 -8
  149. mindspore/mindrecord/config.py +17 -316
  150. mindspore/mindrecord/filereader.py +1 -9
  151. mindspore/mindrecord/filewriter.py +5 -15
  152. mindspore/mindrecord/mindpage.py +1 -9
  153. mindspore/mindspore_backend_common.dll +0 -0
  154. mindspore/mindspore_backend_manager.dll +0 -0
  155. mindspore/mindspore_common.dll +0 -0
  156. mindspore/mindspore_core.dll +0 -0
  157. mindspore/mindspore_dump.dll +0 -0
  158. mindspore/mindspore_frontend.dll +0 -0
  159. mindspore/mindspore_glog.dll +0 -0
  160. mindspore/mindspore_memory_pool.dll +0 -0
  161. mindspore/mindspore_ms_backend.dll +0 -0
  162. mindspore/mindspore_ops.dll +0 -0
  163. mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
  164. mindspore/mindspore_ops_kernel_common.dll +0 -0
  165. mindspore/mindspore_profiler.dll +0 -0
  166. mindspore/mindspore_pyboost.dll +0 -0
  167. mindspore/mindspore_pynative.dll +0 -0
  168. mindspore/mindspore_res_manager.dll +0 -0
  169. mindspore/mindspore_runtime_pipeline.dll +0 -0
  170. mindspore/mint/__init__.py +796 -759
  171. mindspore/mint/distributed/__init__.py +70 -4
  172. mindspore/mint/distributed/distributed.py +2679 -44
  173. mindspore/mint/linalg/__init__.py +8 -0
  174. mindspore/mint/nn/__init__.py +743 -22
  175. mindspore/mint/nn/functional.py +716 -23
  176. mindspore/mint/nn/layer/__init__.py +21 -4
  177. mindspore/mint/nn/layer/_functions.py +334 -0
  178. mindspore/mint/nn/layer/activation.py +276 -1
  179. mindspore/mint/nn/layer/basic.py +123 -0
  180. mindspore/mint/nn/layer/conv.py +921 -0
  181. mindspore/mint/nn/layer/normalization.py +223 -28
  182. mindspore/mint/nn/layer/padding.py +797 -0
  183. mindspore/mint/nn/layer/pooling.py +235 -0
  184. mindspore/mint/optim/__init__.py +3 -1
  185. mindspore/mint/optim/adam.py +223 -0
  186. mindspore/mint/optim/adamw.py +26 -19
  187. mindspore/mint/optim/sgd.py +171 -0
  188. mindspore/mint/special/__init__.py +2 -1
  189. mindspore/msobj140.dll +0 -0
  190. mindspore/mspdb140.dll +0 -0
  191. mindspore/mspdbcore.dll +0 -0
  192. mindspore/mspdbst.dll +0 -0
  193. mindspore/mspft140.dll +0 -0
  194. mindspore/msvcdis140.dll +0 -0
  195. mindspore/msvcp140_1.dll +0 -0
  196. mindspore/msvcp140_2.dll +0 -0
  197. mindspore/msvcp140_atomic_wait.dll +0 -0
  198. mindspore/msvcp140_codecvt_ids.dll +0 -0
  199. mindspore/multiprocessing/__init__.py +5 -0
  200. mindspore/nn/__init__.py +4 -1
  201. mindspore/nn/cell.py +1370 -189
  202. mindspore/nn/dynamic_lr.py +2 -1
  203. mindspore/nn/layer/activation.py +29 -27
  204. mindspore/nn/layer/basic.py +51 -35
  205. mindspore/nn/layer/channel_shuffle.py +3 -3
  206. mindspore/nn/layer/container.py +1 -1
  207. mindspore/nn/layer/conv.py +22 -17
  208. mindspore/nn/layer/embedding.py +12 -11
  209. mindspore/nn/layer/normalization.py +56 -49
  210. mindspore/nn/layer/padding.py +4 -3
  211. mindspore/nn/layer/pooling.py +120 -42
  212. mindspore/nn/layer/rnn_cells.py +1 -1
  213. mindspore/nn/layer/rnns.py +2 -1
  214. mindspore/nn/layer/timedistributed.py +5 -5
  215. mindspore/nn/layer/transformer.py +59 -36
  216. mindspore/nn/learning_rate_schedule.py +8 -4
  217. mindspore/nn/loss/loss.py +58 -55
  218. mindspore/nn/optim/ada_grad.py +7 -5
  219. mindspore/nn/optim/adadelta.py +11 -9
  220. mindspore/nn/optim/adafactor.py +1 -1
  221. mindspore/nn/optim/adam.py +17 -13
  222. mindspore/nn/optim/adamax.py +8 -7
  223. mindspore/nn/optim/adasum.py +5 -5
  224. mindspore/nn/optim/asgd.py +1 -1
  225. mindspore/nn/optim/ftrl.py +11 -9
  226. mindspore/nn/optim/lamb.py +1 -1
  227. mindspore/nn/optim/lars.py +1 -4
  228. mindspore/nn/optim/lazyadam.py +12 -10
  229. mindspore/nn/optim/momentum.py +7 -6
  230. mindspore/nn/optim/optimizer.py +3 -3
  231. mindspore/nn/optim/proximal_ada_grad.py +12 -10
  232. mindspore/nn/optim/rmsprop.py +13 -12
  233. mindspore/nn/optim/rprop.py +11 -9
  234. mindspore/nn/optim/sgd.py +9 -6
  235. mindspore/nn/optim/tft_wrapper.py +5 -2
  236. mindspore/nn/optim/thor.py +2 -1
  237. mindspore/nn/probability/bijector/bijector.py +17 -11
  238. mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
  239. mindspore/nn/probability/bijector/invert.py +2 -2
  240. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  241. mindspore/nn/probability/bijector/softplus.py +3 -2
  242. mindspore/nn/probability/distribution/beta.py +3 -3
  243. mindspore/nn/probability/distribution/categorical.py +1 -1
  244. mindspore/nn/probability/distribution/cauchy.py +4 -2
  245. mindspore/nn/probability/distribution/exponential.py +6 -7
  246. mindspore/nn/probability/distribution/gamma.py +2 -2
  247. mindspore/nn/probability/distribution/gumbel.py +2 -2
  248. mindspore/nn/probability/distribution/half_normal.py +5 -3
  249. mindspore/nn/probability/distribution/logistic.py +5 -3
  250. mindspore/nn/probability/distribution/poisson.py +1 -1
  251. mindspore/nn/probability/distribution/uniform.py +5 -3
  252. mindspore/nn/reinforcement/_tensors_queue.py +1 -1
  253. mindspore/nn/reinforcement/tensor_array.py +1 -1
  254. mindspore/nn/utils/init.py +13 -11
  255. mindspore/nn/wrap/__init__.py +6 -6
  256. mindspore/nn/wrap/cell_wrapper.py +181 -122
  257. mindspore/nn/wrap/grad_reducer.py +45 -36
  258. mindspore/nn/wrap/loss_scale.py +6 -7
  259. mindspore/numpy/array_creations.py +63 -65
  260. mindspore/numpy/array_ops.py +149 -144
  261. mindspore/numpy/logic_ops.py +41 -42
  262. mindspore/numpy/math_ops.py +365 -363
  263. mindspore/numpy/utils.py +17 -18
  264. mindspore/numpy/utils_const.py +5 -6
  265. mindspore/opencv_core452.dll +0 -0
  266. mindspore/opencv_imgcodecs452.dll +0 -0
  267. mindspore/opencv_imgproc452.dll +0 -0
  268. mindspore/ops/__init__.py +5 -3
  269. mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
  270. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
  271. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  272. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  273. mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
  274. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  275. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  276. mindspore/ops/_register_for_op.py +0 -11
  277. mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
  278. mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
  279. mindspore/ops/_vmap/vmap_array_ops.py +27 -25
  280. mindspore/ops/_vmap/vmap_base.py +0 -2
  281. mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
  282. mindspore/ops/_vmap/vmap_math_ops.py +15 -16
  283. mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
  284. mindspore/ops/auto_generate/__init__.py +4 -3
  285. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +236 -46
  286. mindspore/ops/auto_generate/gen_extend_func.py +764 -124
  287. mindspore/ops/auto_generate/gen_ops_def.py +4018 -2264
  288. mindspore/ops/auto_generate/gen_ops_prim.py +15463 -5037
  289. mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
  290. mindspore/ops/composite/__init__.py +2 -1
  291. mindspore/ops/composite/base.py +20 -25
  292. mindspore/ops/composite/math_ops.py +6 -16
  293. mindspore/ops/composite/multitype_ops/__init__.py +5 -2
  294. mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
  295. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
  296. mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
  297. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  298. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  299. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
  300. mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
  301. mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
  302. mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
  303. mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
  304. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
  305. mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
  306. mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
  307. mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
  308. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
  309. mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
  310. mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
  311. mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
  312. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
  313. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  314. mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
  315. mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
  316. mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
  317. mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
  318. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
  319. mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
  320. mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
  321. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
  322. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  323. mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
  324. mindspore/ops/function/__init__.py +40 -2
  325. mindspore/ops/function/_add_attr_func.py +58 -0
  326. mindspore/ops/function/array_func.py +2089 -2403
  327. mindspore/ops/function/clip_func.py +80 -23
  328. mindspore/ops/function/debug_func.py +57 -57
  329. mindspore/ops/function/grad/__init__.py +1 -0
  330. mindspore/ops/function/grad/grad_func.py +104 -71
  331. mindspore/ops/function/image_func.py +2 -2
  332. mindspore/ops/function/linalg_func.py +47 -78
  333. mindspore/ops/function/math_func.py +4501 -3802
  334. mindspore/ops/function/nn_func.py +1726 -620
  335. mindspore/ops/function/other_func.py +159 -1
  336. mindspore/ops/function/parameter_func.py +18 -84
  337. mindspore/ops/function/random_func.py +440 -387
  338. mindspore/ops/function/reshard_func.py +4 -70
  339. mindspore/ops/function/sparse_func.py +3 -3
  340. mindspore/ops/function/sparse_unary_func.py +6 -6
  341. mindspore/ops/function/spectral_func.py +25 -58
  342. mindspore/ops/function/vmap_func.py +24 -17
  343. mindspore/ops/functional.py +22 -7
  344. mindspore/ops/functional_overload.py +1440 -0
  345. mindspore/ops/op_info_register.py +32 -244
  346. mindspore/ops/operations/__init__.py +13 -7
  347. mindspore/ops/operations/_custom_ops_utils.py +247 -0
  348. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  349. mindspore/ops/operations/_grad_ops.py +2 -43
  350. mindspore/ops/operations/_infer_ops.py +2 -1
  351. mindspore/ops/operations/_inner_ops.py +43 -84
  352. mindspore/ops/operations/_ms_kernel.py +4 -10
  353. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  354. mindspore/ops/operations/_scalar_ops.py +3 -2
  355. mindspore/ops/operations/_sequence_ops.py +1 -1
  356. mindspore/ops/operations/_tensor_array.py +1 -1
  357. mindspore/ops/operations/array_ops.py +81 -324
  358. mindspore/ops/operations/comm_ops.py +154 -108
  359. mindspore/ops/operations/custom_ops.py +232 -78
  360. mindspore/ops/operations/debug_ops.py +153 -59
  361. mindspore/ops/operations/inner_ops.py +7 -5
  362. mindspore/ops/operations/linalg_ops.py +1 -57
  363. mindspore/ops/operations/manually_defined/_inner.py +1 -1
  364. mindspore/ops/operations/manually_defined/ops_def.py +928 -180
  365. mindspore/ops/operations/math_ops.py +32 -234
  366. mindspore/ops/operations/nn_ops.py +210 -498
  367. mindspore/ops/operations/other_ops.py +62 -9
  368. mindspore/ops/operations/random_ops.py +13 -7
  369. mindspore/ops/operations/reshard_ops.py +1 -1
  370. mindspore/ops/operations/sparse_ops.py +2 -2
  371. mindspore/ops/primitive.py +66 -53
  372. mindspore/ops/tensor_method.py +1888 -0
  373. mindspore/ops_generate/__init__.py +0 -5
  374. mindspore/ops_generate/aclnn/__init__.py +0 -0
  375. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
  376. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
  377. mindspore/ops_generate/api/__init__.py +0 -0
  378. mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
  379. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
  380. mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
  381. mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
  382. mindspore/ops_generate/api/functions_cc_generator.py +237 -0
  383. mindspore/ops_generate/api/gen_api.py +103 -0
  384. mindspore/ops_generate/api/op_api_proto.py +235 -0
  385. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
  386. mindspore/ops_generate/common/__init__.py +0 -0
  387. mindspore/ops_generate/common/base_generator.py +11 -0
  388. mindspore/ops_generate/common/gen_constants.py +91 -0
  389. mindspore/ops_generate/common/gen_utils.py +348 -0
  390. mindspore/ops_generate/common/op_proto.py +473 -0
  391. mindspore/ops_generate/common/template.py +523 -0
  392. mindspore/ops_generate/gen_ops.py +22 -1069
  393. mindspore/ops_generate/op_def/__init__.py +0 -0
  394. mindspore/ops_generate/op_def/gen_op_def.py +90 -0
  395. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
  396. mindspore/ops_generate/op_def/ops_def_cc_generator.py +299 -0
  397. mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
  398. mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
  399. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
  400. mindspore/ops_generate/op_def_py/__init__.py +0 -0
  401. mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
  402. mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
  403. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
  404. mindspore/ops_generate/pyboost/__init__.py +0 -0
  405. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
  406. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
  407. mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
  408. mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
  409. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
  410. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
  411. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
  412. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
  413. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
  414. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
  415. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
  416. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
  417. mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
  418. mindspore/ops_generate/resources/__init__.py +0 -0
  419. mindspore/ops_generate/resources/resource_list.py +30 -0
  420. mindspore/ops_generate/resources/resource_loader.py +36 -0
  421. mindspore/ops_generate/resources/resource_manager.py +64 -0
  422. mindspore/ops_generate/resources/yaml_loader.py +88 -0
  423. mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
  424. mindspore/parallel/__init__.py +7 -3
  425. mindspore/parallel/_auto_parallel_context.py +152 -34
  426. mindspore/parallel/_cell_wrapper.py +130 -15
  427. mindspore/parallel/_parallel_serialization.py +107 -5
  428. mindspore/parallel/_ps_context.py +1 -1
  429. mindspore/parallel/_recovery_context.py +7 -2
  430. mindspore/parallel/_tensor.py +142 -18
  431. mindspore/parallel/_utils.py +199 -23
  432. mindspore/parallel/algo_parameter_config.py +4 -4
  433. mindspore/parallel/auto_parallel.py +732 -0
  434. mindspore/parallel/checkpoint_convert.py +159 -0
  435. mindspore/parallel/checkpoint_transform.py +698 -35
  436. mindspore/parallel/cluster/process_entity/_api.py +276 -50
  437. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  438. mindspore/parallel/cluster/run.py +21 -4
  439. mindspore/parallel/function/__init__.py +24 -0
  440. mindspore/parallel/function/reshard_func.py +259 -0
  441. mindspore/parallel/nn/__init__.py +25 -0
  442. mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
  443. mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
  444. mindspore/parallel/parameter_broadcast.py +25 -14
  445. mindspore/parallel/shard.py +137 -58
  446. mindspore/parallel/transform_safetensors.py +363 -305
  447. mindspore/pgodb140.dll +0 -0
  448. mindspore/pgort140.dll +0 -0
  449. mindspore/profiler/__init__.py +22 -5
  450. mindspore/profiler/analysis/__init__.py +0 -0
  451. mindspore/profiler/analysis/parser/__init__.py +0 -0
  452. mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
  453. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  454. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  455. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  456. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  457. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  458. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
  459. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  460. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +106 -0
  461. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  462. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  463. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  464. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  465. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  466. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  467. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  468. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  469. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  470. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  471. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
  472. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  473. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  474. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  475. mindspore/profiler/analysis/task_manager.py +131 -0
  476. mindspore/profiler/analysis/time_converter.py +84 -0
  477. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  478. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
  479. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  480. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
  481. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
  482. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
  483. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
  484. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  485. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  486. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
  487. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  488. mindspore/profiler/analysis/work_flow.py +73 -0
  489. mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
  490. mindspore/profiler/common/command_executor.py +90 -0
  491. mindspore/profiler/common/constant.py +186 -3
  492. mindspore/profiler/common/file_manager.py +208 -0
  493. mindspore/profiler/common/log.py +130 -0
  494. mindspore/profiler/common/msprof_cmd_tool.py +221 -0
  495. mindspore/profiler/common/path_manager.py +395 -0
  496. mindspore/profiler/common/process_bar.py +168 -0
  497. mindspore/profiler/common/process_pool.py +9 -3
  498. mindspore/profiler/common/profiler_context.py +500 -0
  499. mindspore/profiler/common/profiler_info.py +304 -0
  500. mindspore/profiler/common/profiler_meta_data.py +74 -0
  501. mindspore/profiler/common/profiler_output_path.py +284 -0
  502. mindspore/profiler/common/profiler_parameters.py +251 -0
  503. mindspore/profiler/common/profiler_path_manager.py +179 -0
  504. mindspore/profiler/common/record_function.py +76 -0
  505. mindspore/profiler/common/tlv_decoder.py +76 -0
  506. mindspore/profiler/common/util.py +75 -2
  507. mindspore/profiler/dynamic_profiler.py +341 -75
  508. mindspore/profiler/envprofiler.py +163 -0
  509. mindspore/profiler/experimental_config.py +197 -0
  510. mindspore/profiler/mstx.py +242 -0
  511. mindspore/profiler/platform/__init__.py +21 -0
  512. mindspore/profiler/platform/base_profiler.py +40 -0
  513. mindspore/profiler/platform/cpu_profiler.py +124 -0
  514. mindspore/profiler/platform/gpu_profiler.py +74 -0
  515. mindspore/profiler/platform/npu_profiler.py +335 -0
  516. mindspore/profiler/profiler.py +1073 -90
  517. mindspore/profiler/profiler_action_controller.py +187 -0
  518. mindspore/profiler/profiler_interface.py +118 -0
  519. mindspore/profiler/schedule.py +243 -0
  520. mindspore/rewrite/api/node.py +15 -13
  521. mindspore/rewrite/api/symbol_tree.py +2 -3
  522. mindspore/run_check/_check_version.py +27 -20
  523. mindspore/run_check/run_check.py +1 -1
  524. mindspore/runtime/__init__.py +37 -0
  525. mindspore/runtime/device.py +27 -0
  526. mindspore/runtime/event.py +209 -0
  527. mindspore/runtime/executor.py +177 -0
  528. mindspore/runtime/memory.py +409 -0
  529. mindspore/runtime/stream.py +460 -0
  530. mindspore/runtime/thread_bind_core.py +401 -0
  531. mindspore/safeguard/rewrite_obfuscation.py +12 -9
  532. mindspore/swresample-4.dll +0 -0
  533. mindspore/swscale-6.dll +0 -0
  534. mindspore/tbbmalloc.dll +0 -0
  535. mindspore/tinyxml2.dll +0 -0
  536. mindspore/train/__init__.py +8 -8
  537. mindspore/train/_utils.py +88 -25
  538. mindspore/train/amp.py +9 -5
  539. mindspore/train/callback/__init__.py +2 -2
  540. mindspore/train/callback/_callback.py +2 -16
  541. mindspore/train/callback/_checkpoint.py +53 -55
  542. mindspore/train/callback/_cluster_monitor.py +14 -18
  543. mindspore/train/callback/_early_stop.py +1 -1
  544. mindspore/train/callback/_flops_collector.py +103 -68
  545. mindspore/train/callback/_history.py +8 -5
  546. mindspore/train/callback/_lambda_callback.py +2 -2
  547. mindspore/train/callback/_landscape.py +0 -3
  548. mindspore/train/callback/_loss_monitor.py +2 -1
  549. mindspore/train/callback/_on_request_exit.py +6 -5
  550. mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
  551. mindspore/train/callback/_summary_collector.py +52 -19
  552. mindspore/train/callback/_time_monitor.py +2 -1
  553. mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -107
  554. mindspore/train/data_sink.py +25 -2
  555. mindspore/train/dataset_helper.py +15 -16
  556. mindspore/train/loss_scale_manager.py +8 -7
  557. mindspore/train/metrics/accuracy.py +3 -3
  558. mindspore/train/metrics/confusion_matrix.py +9 -9
  559. mindspore/train/metrics/error.py +3 -3
  560. mindspore/train/metrics/hausdorff_distance.py +4 -4
  561. mindspore/train/metrics/mean_surface_distance.py +3 -3
  562. mindspore/train/metrics/metric.py +0 -12
  563. mindspore/train/metrics/occlusion_sensitivity.py +4 -2
  564. mindspore/train/metrics/precision.py +11 -10
  565. mindspore/train/metrics/recall.py +9 -9
  566. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  567. mindspore/train/mind_ir_pb2.py +174 -46
  568. mindspore/train/model.py +184 -113
  569. mindspore/train/serialization.py +622 -978
  570. mindspore/train/summary/_summary_adapter.py +2 -2
  571. mindspore/train/summary/summary_record.py +2 -3
  572. mindspore/train/train_thor/model_thor.py +1 -1
  573. mindspore/turbojpeg.dll +0 -0
  574. mindspore/utils/__init__.py +6 -3
  575. mindspore/utils/dryrun.py +140 -0
  576. mindspore/utils/hooks.py +81 -0
  577. mindspore/utils/runtime_execution_order_check.py +550 -0
  578. mindspore/utils/utils.py +138 -4
  579. mindspore/vcmeta.dll +0 -0
  580. mindspore/vcruntime140.dll +0 -0
  581. mindspore/vcruntime140_1.dll +0 -0
  582. mindspore/version.py +1 -1
  583. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +3 -3
  584. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +587 -418
  585. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +1 -1
  586. mindspore/_install_custom.py +0 -43
  587. mindspore/common/_register_for_adapter.py +0 -74
  588. mindspore/common/_tensor_overload.py +0 -139
  589. mindspore/mindspore_np_dtype.dll +0 -0
  590. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
  591. mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
  592. mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
  593. mindspore/ops_generate/gen_aclnn_implement.py +0 -263
  594. mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
  595. mindspore/ops_generate/gen_pyboost_func.py +0 -1052
  596. mindspore/ops_generate/gen_utils.py +0 -209
  597. mindspore/ops_generate/op_proto.py +0 -145
  598. mindspore/ops_generate/template.py +0 -261
  599. mindspore/profiler/envprofiling.py +0 -254
  600. mindspore/profiler/profiling.py +0 -1926
  601. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
  602. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0
@@ -26,10 +26,10 @@ from mindspore.ops import functional as F
26
26
  from mindspore.ops.operations import nn_ops as NN_OPS
27
27
  from mindspore.ops.operations import _sequence_ops as seq
28
28
  import mindspore.common.dtype as mstype
29
- from mindspore.ops.function.math_func import logsumexp
29
+ from mindspore.ops.function.math_func import logsumexp, div
30
30
  from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
31
31
  from mindspore.common.tensor import Tensor
32
- from mindspore._c_expression import Tensor as Tensor_
32
+ from mindspore._c_expression import TensorPy as Tensor_
33
33
  from mindspore.ops._primitive_cache import _get_cache_prim
34
34
  from mindspore import _checkparam as validator
35
35
  from mindspore.ops.composite.multitype_ops._constexpr_utils import raise_value_error
@@ -40,22 +40,72 @@ from mindspore.ops.operations.nn_ops import ChannelShuffle
40
40
  from mindspore.ops.operations.nn_ops import TripletMarginLoss
41
41
  from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
42
42
  from mindspore.common.api import _function_forbid_reuse
43
- from mindspore.ops.auto_generate import log_softmax, dense, prelu, celu, relu, fast_gelu, silu, elu, sigmoid, relu6, \
44
- softmax_impl, swiglu, logsigmoid_op
45
- from mindspore.ops.auto_generate import group_norm_op, rms_norm, layer_norm_ext_op, batch_norm_ext_op, mse_loss_ext
43
+ from mindspore.ops.auto_generate import log_softmax, dense, prelu, celu, fast_gelu, silu, elu, sigmoid, relu6, \
44
+ softmax_impl, swiglu, logsigmoid_op, kl_div_op, divs_op
45
+ from mindspore.ops.auto_generate import relu_op, inplace_relu_op
46
+ from mindspore.ops.auto_generate import group_norm_op, rms_norm, add_rms_norm, layer_norm_ext_op, batch_norm_ext_op,\
47
+ mse_loss_ext
48
+ # 1
46
49
  from mindspore.ops.auto_generate import (reflection_pad_1d_op, reflection_pad_2d_op, add_layernorm_v2_op,
47
50
  reflection_pad_3d_op, # pylint: disable=W0611
48
51
  replication_pad_1d_op, replication_pad_2d_op, replication_pad_3d_op,
49
52
  constant_pad_nd_op, dropout_ext_op, reverse_v2_impl, avg_pool2d_op,
50
53
  upsample_nearest1d_op, upsample_nearest2d_op, upsample_nearest3d_op,
51
54
  upsample_linear1d_op, upsample_bilinear2d_op, upsample_bicubic2d_op,
52
- upsample_trilinear3d_impl, fill_scalar_op, floor_op)
53
- from mindspore.ops.auto_generate.gen_ops_prim import embedding_op, Convolution, ConstantPadND, MaxPoolWithIndices, \
54
- MaxPoolWithMask
55
+ upsample_trilinear3d_impl, fill_scalar_op, floor_op, nllloss_2d_op,
56
+ masked_fill_op, masked_select, ones, flatten_ext, conv_transpose2d)
57
+ # 2
58
+
59
+ # 3
60
+
61
+ # 4
62
+
63
+ # 5
64
+
65
+ # 6
66
+
67
+ # 7
68
+
69
+ # 8
70
+
71
+ # 9
72
+
73
+ # 10
74
+
75
+ # 11
76
+
77
+ # 12
78
+
79
+ # 13
80
+
81
+ # 14
82
+
83
+ # 15
84
+ from mindspore.ops.auto_generate import avg_pool3d_ext_op
85
+ # 16
86
+
87
+ # 17
88
+
89
+ # 18
90
+
91
+ # 19
92
+
93
+ # 20
94
+
95
+ from mindspore.ops.auto_generate.gen_ops_prim import embedding_op, MaxPoolWithIndices, \
96
+ PromptFlashAttention, MaxPoolWithMask
97
+ from mindspore.ops.auto_generate.gen_ops_prim import conv3d_ext_op, conv3d_padding_op, conv2d_ext_op, \
98
+ conv2d_padding_op, conv1d_ext_op, conv1d_padding_op, speed_fusion_attention_op
55
99
  from mindspore.common.generator import default_generator
56
100
  from mindspore.ops.auto_generate import hardshrink, hardsigmoid, hardswish
57
101
  from mindspore.ops.auto_generate import softshrink
102
+ from mindspore.ops.auto_generate import soft_margin_loss
103
+ from mindspore.ops.auto_generate import moe_token_permute, moe_token_unpermute
58
104
  from mindspore.ops.auto_generate import adaptive_avg_pool2d_ext_op
105
+ from mindspore.ops.auto_generate.pyboost_inner_prim import nllloss_impl
106
+ from mindspore.ops.auto_generate.pyboost_inner_prim import adaptive_max_pool2d_impl
107
+ from mindspore.ops.function.array_func import gather_ext
108
+ from mindspore.ops.operations.manually_defined import flash_attention_score, fused_infer_attention_score
59
109
 
60
110
  abs_ = P.Abs()
61
111
  add_ = P.Add()
@@ -111,7 +161,7 @@ check_int_const = validator.check_is_int
111
161
  check_non_negative_float_const = validator.check_non_negative_float
112
162
  check_string_const = constexpr(validator.check_string)
113
163
 
114
- generator_step_ = Tensor(1, mstype.int64)
164
+ generator_step_ = Tensor(12, mstype.int64)
115
165
 
116
166
 
117
167
  def adaptive_avg_pool2d(input, output_size):
@@ -153,11 +203,11 @@ def adaptive_avg_pool2d(input, output_size):
153
203
  .. math::
154
204
 
155
205
  out\_shape = \begin{cases}
156
- input\_shape[-2] + output\_size[1], & \text{if } output\_size text{ is (None, w);}\\
157
- output\_size[0] + input\_shape[-1], & \text{if } output\_size text{ is (h, None);}\\
158
- input\_shape[-2:], & \text{if } output\_size text{ is (None, None);}\\
159
- (h, h), & \text{if } output\_size text{ is h;}\\
160
- (h, w), & \text{if } output\_size text{ is (h, w)}
206
+ input\_shape[-2] + output\_size[1], & \text{if } output\_size \text{ is (None, w);}\\
207
+ output\_size[0] + input\_shape[-1], & \text{if } output\_size \text{ is (h, None);}\\
208
+ input\_shape[-2:], & \text{if } output\_size \text{ is (None, None);}\\
209
+ (h, h), & \text{if } output\_size \text{ is h;}\\
210
+ (h, w), & \text{if } output\_size \text{ is (h, w)}
161
211
  \end{cases}
162
212
 
163
213
  Raises:
@@ -247,11 +297,11 @@ def adaptive_avg_pool2d_ext(input, output_size):
247
297
  .. math::
248
298
 
249
299
  out\_shape = \begin{cases}
250
- input\_shape[-2] + output\_size[1], & \text{if } output\_size text{ is (None, w);}\\
251
- output\_size[0] + input\_shape[-1], & \text{if } output\_size text{ is (h, None);}\\
252
- input\_shape[-2:], & \text{if } output\_size text{ is (None, None);}\\
253
- (h, h), & \text{if } output\_size text{ is h;}\\
254
- (h, w), & \text{if } output\_size text{ is (h, w)}
300
+ input\_shape[-2] + output\_size[1], & \text{if } output\_size \text{ is (None, w);}\\
301
+ output\_size[0] + input\_shape[-1], & \text{if } output\_size \text{ is (h, None);}\\
302
+ input\_shape[-2:], & \text{if } output\_size \text{ is (None, None);}\\
303
+ (h, h), & \text{if } output\_size \text{ is h;}\\
304
+ (h, w), & \text{if } output\_size \text{ is (h, w)}
255
305
  \end{cases}
256
306
 
257
307
  Raises:
@@ -399,13 +449,15 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
399
449
 
400
450
  Args:
401
451
  input_x (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})`.
402
- kernel_size (int): The size of kernel window used to take the average value. Default: ``1`` .
403
- stride (Union(int, tuple[int])): The distance of kernel moving. `stride` can either be an int
452
+ kernel_size (int, optional): The size of kernel window used to take the average value. Default: ``1`` .
453
+ stride (Union(int, tuple[int]), optional): The distance of kernel moving. `stride` can either be an int
404
454
  number or a tuple of one int number. Default: ``1`` .
405
- padding (Union(int, tuple[int])): The pad value to be filled. `padding` can either be an integer
455
+ padding (Union(int, tuple[int]), optional): The pad value to be filled. `padding` can either be an integer
406
456
  or a tuple of one integer. Default: ``0`` .
407
- ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
408
- count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
457
+ ceil_mode (bool, optional): If True, apply ceil instead of floor to compute the output shape.
458
+ Default: ``False``.
459
+ count_include_pad (bool, optional): If True, include the zero-padding in the averaging calculation.
460
+ Default: ``True`` .
409
461
 
410
462
  Returns:
411
463
  Tensor of shape :math:`(N, C_{out}, L_{out})`.
@@ -620,13 +672,13 @@ def avg_pool2d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False,
620
672
  outputs regional average in the :math:`(H_{in}, W_{in})` -dimension.
621
673
  Given kernel size :math:`(kH, kW)` and `stride` , the operation is as follows.
622
674
 
623
- .. note::
624
- On the Atlas platform, when calculating the input, the precision is degraded from float32 to float16.
625
-
626
675
  .. math::
627
676
  \text{output}(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
628
677
  \text{input}(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
629
678
 
679
+ .. note::
680
+ On the Atlas platform, when calculating the input, the precision is degraded from float32 to float16.
681
+
630
682
  Args:
631
683
  input (Tensor): Tensor of shape :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`.
632
684
  kernel_size (Union[int, tuple[int], list[int]]): The size of kernel used to take the average value.
@@ -779,6 +831,77 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
779
831
  return avg_pool_op(input_x)
780
832
 
781
833
 
834
+ def avg_pool3d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True,
835
+ divisor_override=None):
836
+ r"""
837
+ Applies a 3D average pooling over an input Tensor which can be regarded as a composition of
838
+ 3D input planes. Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})` ,
839
+ outputs regional average in the :math:`(D_{in}, H_{in}, W_{in})` -dimension.
840
+ Given kernel size :math:`(kD, kH, kW)` and `stride` , the operation is as follows.
841
+
842
+ .. math::
843
+ \text{output}(N_i, C_j, d, h, w) = \frac{1}{kD * kH * kW} \sum_{l=0}^{kD-1} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
844
+
845
+ \text{input}(N_i, C_j, stride[0] \times d + l, stride[1] \times h + m, stride[2] \times w + n)
846
+
847
+ .. warning::
848
+ This is an experimental API that is subject to change or deletion.
849
+
850
+ Note:
851
+ This interface currently does not support Atlas A2 training series products.
852
+
853
+ Args:
854
+ input (Tensor): Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
855
+ kernel_size (Union[int, tuple[int], list[int]]): The size of kernel used to take the average value.
856
+ Can be a single number or a tuple :math:`(kD, kH, kW)` .
857
+ stride (Union[int, tuple[int], list[int]], optional): The distance of kernel moving.
858
+ Can be a single number or a tuple :math:`(sD, sH, sW)` . Default: ``None``,
859
+ where its value is equal to `kernel_size`.
860
+ padding (Union[int, tuple[int], list[int]], optional): Implicit zero padding to be added on both sides.
861
+ Can be a single number or a tuple :math:`(padD, padH, padW)` . Default: ``0``.
862
+ ceil_mode (bool, optional): If True, apply ceil instead of floor to compute the output shape.
863
+ Default: ``False``.
864
+ count_include_pad (bool, optional): If True, include the zero-padding in the averaging calculation.
865
+ Default: ``True`` .
866
+ divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
867
+ otherwise size of pooling region will be used. Default: ``None``.
868
+
869
+ Returns:
870
+ Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(C, D_{out}, H_{out}, W_{out})`.
871
+
872
+ .. math::
873
+ \begin{array}{ll} \\
874
+ D_{out} = \frac{D_{in} + 2 \times padding[0] - kernel\_size[0]}{stride[0]} + 1 \\
875
+ H_{out} = \frac{H_{in} + 2 \times padding[1] - kernel\_size[0]}{stride[1]} + 1 \\
876
+ W_{out} = \frac{W_{in} + 2 \times padding[2] - kernel\_size[1]}{stride[2]} + 1
877
+ \end{array}
878
+
879
+ Raises:
880
+ TypeError: If `input` is not a Tensor.
881
+ TypeError: If `kernel_size` or `stride` is neither int nor tuple.
882
+ TypeError: If `ceil_mode` or `count_include_pad` is not a bool.
883
+ TypeError: If `divisor_override` is not an int or None.
884
+ ValueError: If the dimension of `input` is not equal to `4` or `5`.
885
+ ValueError: If `kernel_size` or `stride` is less than 1.
886
+ ValueError: If value of `padding` is less than `0`.
887
+ ValueError: If `kernel_size`, `padding` or `stride` is a tuple whose length is not equal to `1` or `3`.
888
+
889
+ Supported Platforms:
890
+ ``Ascend``
891
+
892
+ Examples:
893
+ >>> import mindspore
894
+ >>> import numpy as np
895
+ >>> from mindspore import Tensor, ops
896
+ >>> input_x = Tensor(np.arange(1 * 2 * 2 * 2 * 3).reshape((1, 2, 2, 2, 3)), mindspore.float16)
897
+ >>> output = ops.avg_pool3d_ext(input_x, kernel_size=2, stride=1)
898
+ >>> print(output)
899
+ [[[[[ 5. 6.]]]
900
+ [[[17. 18.]]]]]
901
+ """
902
+ return avg_pool3d_ext_op(input, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
903
+
904
+
782
905
  @constexpr
783
906
  def is_ascend_backend():
784
907
  """Check if the Ascend is used"""
@@ -898,7 +1021,7 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
898
1021
  \end{align}
899
1022
 
900
1023
  Note:
901
- Ascend platform only supports float16 type for input.
1024
+ In KBK mode, `output_size` does not support mutable.
902
1025
 
903
1026
  Args:
904
1027
  input (Tensor): A 3D or 4D tensor,
@@ -907,7 +1030,7 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
907
1030
  or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
908
1031
  If it is None, it means the output size is the same as the input size.
909
1032
 
910
- return_indices (bool): If `return_indices` is ``True`` , the indices of max value would be output.
1033
+ return_indices (bool, optional): If `return_indices` is ``True`` , the indices of max value would be output.
911
1034
  Default: ``False`` .
912
1035
 
913
1036
  Returns:
@@ -959,11 +1082,17 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
959
1082
  [[8. 9.]]
960
1083
  [[8. 9.]]]]
961
1084
  """
1085
+ output_size_ = None
962
1086
  _check_adaptive_max_pool2d(return_indices)
963
- _adaptive_max_pool2d = _get_cache_prim(NN_OPS.AdaptiveMaxPool2D)(output_size)
964
- out = _adaptive_max_pool2d(input)
965
- output = out if return_indices else out[0]
966
- return output
1087
+
1088
+ if isinstance(output_size, int):
1089
+ output_size_ = (output_size, output_size)
1090
+ else:
1091
+ output_size_ = tuple(-1 if val is None else val for val in output_size)
1092
+
1093
+ if return_indices:
1094
+ return adaptive_max_pool2d_impl(input, output_size_)
1095
+ return adaptive_max_pool2d_impl(input, output_size_)[0]
967
1096
 
968
1097
 
969
1098
  def adaptive_max_pool3d(input, output_size, return_indices=False):
@@ -1438,7 +1567,7 @@ def dropout(input, p=0.5, training=True, seed=None):
1438
1567
  input (Tensor): The input Tensor of shape :math:`(*, N)`, with data type of float16, float32 or float64.
1439
1568
  p (float, optional): The dropping rate, between 0 and 1, e.g. p = 0.1,
1440
1569
  means dropping out 10% of input units. Default: ``0.5`` .
1441
- training (bool): Apply dropout if is True. Default: ``True``.
1570
+ training (bool, optional): Apply dropout if is True. Default: ``True``.
1442
1571
  seed (int, optional): Seed is used as entropy source for Random number engines generating pseudo-random numbers.
1443
1572
  Default: ``None`` , which will be treated as ``0`` .
1444
1573
 
@@ -1473,7 +1602,7 @@ def dropout(input, p=0.5, training=True, seed=None):
1473
1602
 
1474
1603
 
1475
1604
  @_function_forbid_reuse
1476
- def dropout_ext(input, p=0.5, training=True):
1605
+ def dropout_ext(input, p=0.5, training=True, inplace=False):
1477
1606
  r"""
1478
1607
  During training, randomly zeroes some of the elements of the input tensor
1479
1608
  with probability `p` from a Bernoulli distribution. It plays the role of reducing neuron correlation and
@@ -1482,10 +1611,12 @@ def dropout_ext(input, p=0.5, training=True):
1482
1611
 
1483
1612
  Args:
1484
1613
  input (Tensor): The input Tensor of shape :math:`(*, N)`.
1485
- p (float): The dropping rate of input neurons, between 0 and 1, e.g. `p` = 0.1,
1614
+ p (float, optional): The dropping rate of input neurons, between 0 and 1, e.g. `p` = 0.1,
1486
1615
  means dropping out 10% of input neurons. Default: ``0.5`` .
1487
- training (bool): Apply dropout if it is ``True`` , if it is ``False`` , the input is returned directly,
1488
- and `p` is invalid. Default: ``True``.
1616
+ training (bool, optional): Apply dropout if it is ``True`` ,
1617
+ if it is ``False`` , the input is returned directly,
1618
+ and `p` is invalid. Default: ``True`` .
1619
+ inplace (bool, optional): If set to ``True`` , will do this operation in-place. Default: ``False`` .
1489
1620
 
1490
1621
  Returns:
1491
1622
  - **output** (Tensor) - Zeroed tensor, with the same shape and data type as `input`.
@@ -1506,10 +1637,14 @@ def dropout_ext(input, p=0.5, training=True):
1506
1637
  (2, 2)
1507
1638
  """
1508
1639
  check_bool_const(training, "training", "dropout_ext")
1509
- if training is False:
1640
+ check_bool_const(inplace, "inplace", "dropout_ext")
1641
+ if not training:
1510
1642
  return input
1511
1643
  seed, offset = default_generator._step(generator_step_) # pylint: disable=protected-access
1512
1644
  out, _ = dropout_ext_op(input, p, seed, offset)
1645
+ if inplace:
1646
+ input.copy_(out)
1647
+ return input
1513
1648
  return out
1514
1649
 
1515
1650
 
@@ -1610,7 +1745,7 @@ def dropout2d(input, p=0.5, training=True):
1610
1745
  input (Tensor): A `4D` tensor with shape :math:`(N, C, H, W)`, where `N` is the batch size, `C` is the number
1611
1746
  of channels, `H` is the feature height, and `W` is the feature width. The data type must be int8,
1612
1747
  int16, int32, int64, float16, float32 or float64.
1613
- p (float): The dropping probability of a channel, between 0 and 1, e.g. `p` = 0.8,
1748
+ p (float): The dropping probability of a channel. The range is [0.0, 1.0], e.g. `p` = 0.8,
1614
1749
  which means dropping out 80% of channels. Default: ``0.5`` .
1615
1750
  training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: ``True`` .
1616
1751
 
@@ -2022,6 +2157,75 @@ def kl_div(logits, labels, reduction='mean'):
2022
2157
  return _get_cache_prim(P.KLDivLoss)(reduction=reduction)(logits, labels)
2023
2158
 
2024
2159
 
2160
+ def kl_div_ext(input, target, reduction='mean', log_target=False):
2161
+ r"""
2162
+ Computes the Kullback-Leibler divergence between the `input` and the `target`.
2163
+
2164
+ For tensors of the same shape :math:`x` and :math:`y`,
2165
+ the updating formulas of KLDivLoss algorithm are as follows,
2166
+
2167
+ .. math::
2168
+ L(x, y) = y \cdot (\log y - x)
2169
+
2170
+ Then,
2171
+
2172
+ .. math::
2173
+ \ell(x, y) = \begin{cases}
2174
+ L(x, y), & \text{if reduction} = \text{'none';}\\
2175
+ \operatorname{mean}(L(x, y)), & \text{if reduction} = \text{'mean';}\\
2176
+ \operatorname{sum}(L(x, y)) / x.\operatorname{shape}[0], & \text{if reduction} = \text{'batchmean';}\\
2177
+ \operatorname{sum}(L(x, y)), & \text{if reduction} = \text{'sum'.}
2178
+ \end{cases}
2179
+
2180
+ where :math:`x` represents `input`, :math:`y` represents `target`, and :math:`\ell(x, y)` represents the output.
2181
+
2182
+ Note:
2183
+ The output aligns with the mathematical definition of Kullback-Leibler divergence
2184
+ only when `reduction` is set to ``'batchmean'``.
2185
+
2186
+ Args:
2187
+ input (Tensor): The input Tensor. The data type must be float16, float32 or bfloat16(only supported by Atlas A2
2188
+ training series products).
2189
+ target (Tensor): The target Tensor which has the same type as `input`. The shapes of `target` and `input`
2190
+ should be broadcastable.
2191
+ reduction (str, optional): Specifies the reduction to be applied to the output. Default: ``'mean'``.
2192
+ log_target (bool, optional): Specifies whether `target` is passed in the log space. Default: ``False``.
2193
+
2194
+ Returns:
2195
+ Tensor, has the same dtype as `input`. If `reduction` is ``'none'``, then output has the shape as broadcast
2196
+ result of the `input` and `target`. Otherwise, it is a scalar Tensor.
2197
+
2198
+ Raises:
2199
+ TypeError: If neither `input` nor `target` is a Tensor.
2200
+ TypeError: If dtype of `input` or `target` is not float16, float32 or bfloat16.
2201
+ TypeError: If dtype of `target` is not the same as `input`.
2202
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``, ``'batchmean'``.
2203
+ ValueError: If shapes of `target` and `input` can not be broadcastable.
2204
+
2205
+ Supported Platforms:
2206
+ ``Ascend``
2207
+
2208
+ Examples:
2209
+ >>> import mindspore as ms
2210
+ >>> from mindspore import ops
2211
+ >>> import numpy as np
2212
+ >>> input = ms.Tensor(np.array([[0.5, 0.5], [0.4, 0.6]]), ms.float32)
2213
+ >>> target = ms.Tensor(np.array([[0., 1.], [1., 0.]]), ms.float32)
2214
+ >>> output = ops.kl_div_ext(input, target, reduction='mean', log_target=False)
2215
+ >>> print(output)
2216
+ -0.225
2217
+ """
2218
+ if reduction == 'batchmean':
2219
+ reduced = kl_div_op(input, target, 'sum', log_target)
2220
+ else:
2221
+ reduced = kl_div_op(input, target, reduction, log_target)
2222
+
2223
+ if reduction == 'batchmean' and input.ndim != 0:
2224
+ reduced = divs_op(reduced, input.shape[0])
2225
+
2226
+ return reduced
2227
+
2228
+
2025
2229
  @constexpr
2026
2230
  def _check_axis_in_range(axis, ndim):
2027
2231
  """Checks axes are with the bounds of ndim"""
@@ -2081,33 +2285,22 @@ def _check_input_tensor(arg_name, *tensors):
2081
2285
 
2082
2286
  def flip(input, dims):
2083
2287
  """
2084
- Reverses the order of elements in a tensor along the given axis.
2085
-
2086
- The shape of the tensor is preserved, but the elements are reordered.
2288
+ Reverses elements in a tensor along the given dims.
2087
2289
 
2088
2290
  Args:
2089
- input (Tensor): Input tensor.
2090
- dims (Union[list[int], tuple[int]]): Axis or axes along which to flip over.
2091
- Flipping is performed on all of the axes specified in the tuple,
2092
- If `dims` is a tuple of integers contains negative, it counts from the last to the first axis.
2291
+ input (Tensor): The input tensor.
2292
+ dims (Union[list[int], tuple[int]]): The dimension to flip.
2093
2293
 
2094
2294
  Returns:
2095
- Tensor, with the entries of `dims` reversed.
2096
-
2097
- Raises:
2098
- TypeError: If the input is not a tensor.
2099
- ValueError: If `dims` is None.
2100
- ValueError: If `dims` is not a list/tuple of ints.
2295
+ Tensor
2101
2296
 
2102
2297
  Supported Platforms:
2103
2298
  ``Ascend`` ``GPU`` ``CPU``
2104
2299
 
2105
2300
  Examples:
2106
2301
  >>> import mindspore
2107
- >>> from mindspore import ops
2108
- >>> import numpy as np
2109
- >>> input = mindspore.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
2110
- >>> output = ops.flip(input, (0, 2))
2302
+ >>> input = mindspore.tensor(mindspore.ops.arange(1, 9).reshape((2, 2, 2)))
2303
+ >>> output = mindspore.ops.flip(input, (0, 2))
2111
2304
  >>> print(output)
2112
2305
  [[[6 5]
2113
2306
  [8 7]]
@@ -2120,26 +2313,21 @@ def flip(input, dims):
2120
2313
 
2121
2314
  def flipud(input):
2122
2315
  """
2123
- Flips the elements of each column in the up/down direction, while preserving the rows of the input tensor.
2316
+ Flip the input tensor in up/down direction.
2124
2317
 
2125
2318
  Args:
2126
- input (Tensor): Input array.
2319
+ input (Tensor): The input tensor, the dimension must be at least 2.
2127
2320
 
2128
2321
  Returns:
2129
- Tensor after the flip.
2130
-
2131
- Raises:
2132
- TypeError: If the input is not a tensor.
2322
+ Tensor
2133
2323
 
2134
2324
  Supported Platforms:
2135
2325
  ``Ascend`` ``GPU`` ``CPU``
2136
2326
 
2137
2327
  Examples:
2138
- >>> import mindspore as ms
2139
- >>> from mindspore import ops
2140
- >>> import numpy as np
2141
- >>> input = ms.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
2142
- >>> output = ops.flipud(input)
2328
+ >>> import mindspore
2329
+ >>> input = mindspore.tensor(mindspore.ops.arange(1, 9).reshape((2, 2, 2)))
2330
+ >>> output = mindspore.ops.flipud(input)
2143
2331
  >>> print(output)
2144
2332
  [[[5 6]
2145
2333
  [7 8]]
@@ -2151,26 +2339,21 @@ def flipud(input):
2151
2339
 
2152
2340
  def fliplr(input):
2153
2341
  """
2154
- Flips the elements of each row in the left/right direction, while preserving the columns of the input tensor.
2342
+ Flip the input tensor in left/right direction.
2155
2343
 
2156
2344
  Args:
2157
- input (Tensor): Input tensor.
2345
+ input (Tensor): The input tensor, the dimension must be at least 2.
2158
2346
 
2159
2347
  Returns:
2160
- Tensor after the flip.
2161
-
2162
- Raises:
2163
- TypeError: If the input is not a tensor.
2348
+ Tensor
2164
2349
 
2165
2350
  Supported Platforms:
2166
2351
  ``Ascend`` ``GPU`` ``CPU``
2167
2352
 
2168
2353
  Examples:
2169
- >>> import mindspore as ms
2170
- >>> from mindspore import ops
2171
- >>> import numpy as np
2172
- >>> input = ms.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
2173
- >>> output = ops.fliplr(input)
2354
+ >>> import mindspore
2355
+ >>> input = mindspore.tensor(mindspore.ops.arange(1, 9).reshape((2, 2, 2)))
2356
+ >>> output = mindspore.ops.fliplr(input)
2174
2357
  >>> print(output)
2175
2358
  [[[3 4]
2176
2359
  [1 2]]
@@ -2182,29 +2365,33 @@ def fliplr(input):
2182
2365
 
2183
2366
  def is_floating_point(input):
2184
2367
  """
2185
- Judge whether the data type of `input` is a floating point data type i.e., one of mindspore.float64,
2186
- mindspore.float32, mindspore.float16.
2368
+ If the data type of the tensor is a floating point data type, return True. Otherwise return False.
2187
2369
 
2188
2370
  Args:
2189
2371
  input (Tensor): The input Tensor.
2190
2372
 
2191
2373
  Returns:
2192
- Bool. If the dtype of `input` is a floating point data type, return ``True`` . Otherwise, return ``False`` .
2374
+ Bool
2193
2375
 
2194
2376
  Supported Platforms:
2195
2377
  ``Ascend`` ``GPU`` ``CPU``
2196
2378
 
2197
2379
  Examples:
2198
- >>> import mindspore as ms
2199
- >>> from mindspore import ops
2200
- >>> from mindspore import Tensor
2201
- >>> x = ms.Tensor([1, 2, 3], ms.float32)
2202
- >>> y = ms.Tensor([1, 2, 3], ms.int64)
2203
- >>> output = ops.is_floating_point(x)
2204
- >>> output2 = ops.is_floating_point(y)
2205
- >>> print(output)
2380
+ >>> import mindspore
2381
+ >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.float64)
2382
+ >>> mindspore.ops.is_floating_point(input)
2206
2383
  True
2207
- >>> print(output2)
2384
+ >>>
2385
+ >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.float32)
2386
+ >>> mindspore.ops.is_floating_point(input)
2387
+ True
2388
+ >>>
2389
+ >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.float16)
2390
+ >>> mindspore.ops.is_floating_point(input)
2391
+ True
2392
+ >>>
2393
+ >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.int32)
2394
+ >>> mindspore.ops.is_floating_point(input)
2208
2395
  False
2209
2396
  """
2210
2397
  return input.dtype in [mstype.float32, mstype.bfloat16, mstype.float16, mstype.float64]
@@ -2326,12 +2513,20 @@ def interpolate(input,
2326
2513
  If scale_factor is a tuple or list, its length should be the same as the number of dimensions in input
2327
2514
  after removing the first two dimensions N, C.
2328
2515
  One and only one of size and scale_factor can be set to None. Default: ``None`` .
2329
- mode (str): The sampling algorithm.
2330
- One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
2331
- 'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
2332
- knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
2333
-
2334
- align_corners (bool): Whether to use corner alignment for coordinate mapping. Assuming a transformation is
2516
+ mode (str, optional): The sampling algorithm. Default: ``"nearest"`` .
2517
+ One of the following sampling methods can be used:
2518
+
2519
+ - 'nearest': the nearest neighbours interpolation.
2520
+ - 'linear': Linear interpolation, 3D only.
2521
+ - 'bilinear': Bilinear interpolation, 4D only.
2522
+ - 'trilinear': Trilinear interpolation, 5D only.
2523
+ - 'bicubic': Double trilinear interpolation, 4D only.
2524
+ - 'area': area interpolation.
2525
+ - 'nearest-exact': matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
2526
+ knows issues with `nearest`, for 3D and 4D.
2527
+
2528
+ align_corners (bool, optional): Whether to use corner alignment for coordinate mapping.
2529
+ Assuming a transformation is
2335
2530
  applied to the input Tensor along the x-axis, the specific calculation formula is as follows:
2336
2531
 
2337
2532
  .. code-block::
@@ -2348,9 +2543,10 @@ def interpolate(input,
2348
2543
 
2349
2544
  This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``False`` .
2350
2545
  recompute_scale_factor (bool, optional): Recalculate `scale_factor`.
2351
- If True, the parameter `size` will be calculated using the value of the `scale_factor`,
2352
- and finally scaled using the value of `size`.
2353
- If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: ``None`` .
2546
+
2547
+ - If True, the parameter `size` will be calculated using the value of the `scale_factor`,
2548
+ and finally scaled using the value of `size`.
2549
+ - If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: ``None`` .
2354
2550
 
2355
2551
  .. note::
2356
2552
  The 'nearest-exact' mode is the same as the nearest-neighbor interpolation algorithm used in
@@ -2415,8 +2611,8 @@ def interpolate(input,
2415
2611
  >>> input = Tensor([[[1, 2, 3], [4, 5, 6]]], mindspore.float32)
2416
2612
  >>> output = ops.interpolate(input, size=(6,), mode='nearest')
2417
2613
  >>> print(output)
2418
- [[[1. 1. 2. 2. 3. 3.]
2419
- [4. 4. 5. 5. 6. 6.]]]
2614
+ [[[1. 1. 2. 2. 3. 3.]
2615
+ [4. 4. 5. 5. 6. 6.]]]
2420
2616
  """
2421
2617
 
2422
2618
  def run_nearest(x, size, align_corners=None, scale_factor=None):
@@ -2667,7 +2863,7 @@ def interpolate_ext(input,
2667
2863
  r"""
2668
2864
  Samples the input Tensor to the given size or scale_factor by using one of the interpolate algorithms.
2669
2865
 
2670
- .. warnings:
2866
+ .. warning::
2671
2867
  This is an experimental API that is subject to change or deletion.
2672
2868
 
2673
2869
  .. note::
@@ -2675,7 +2871,7 @@ def interpolate_ext(input,
2675
2871
  is not supported.
2676
2872
  - In 'nearest' mode, there may exist precision problem in the scenarios, where input is 3-D/4-D Tensor
2677
2873
  and the image is scaled by scale_factor.
2678
- - `mode` and `scale_factor` should be constants.
2874
+ - `mode` and `recompute_scale_factor` should be constants.
2679
2875
 
2680
2876
  Args:
2681
2877
  input (Tensor): Tensor to be resized.
@@ -2690,9 +2886,11 @@ def interpolate_ext(input,
2690
2886
  after removing the first two dimensions N, C.
2691
2887
  One and only one of size and scale_factor can be set to None. Default: ``None`` .
2692
2888
  mode (str): The sampling algorithm.
2693
- One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), and 'bicubic' (4D only).
2889
+ One of 'nearest', 'linear' (3D only),
2890
+ 'bilinear' (4D only), 'trilinear' (5D only), and 'bicubic' (4D only).
2694
2891
  Default: ``"nearest"`` .
2695
- align_corners (bool): Whether to use corner alignment for coordinate mapping. Assuming a transformation is
2892
+ align_corners (bool, optional): Whether to use corner alignment for coordinate mapping.
2893
+ Assuming a transformation is
2696
2894
  applied to the input Tensor along the x-axis, the specific calculation formula is as follows:
2697
2895
 
2698
2896
  .. code-block::
@@ -2707,7 +2905,7 @@ def interpolate_ext(input,
2707
2905
  the corresponding coordinate of the original
2708
2906
  data along the x-axis.
2709
2907
 
2710
- This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``False`` .
2908
+ This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``None`` .
2711
2909
  recompute_scale_factor (bool, optional): Recalculate `scale_factor`.
2712
2910
  If True, the parameter `size` will be calculated using the value of the `scale_factor`,
2713
2911
  and finally scaled using the value of `size`.
@@ -2740,20 +2938,6 @@ def interpolate_ext(input,
2740
2938
  Returns:
2741
2939
  Tensor, sampled, whose dimensions and dtype are the same as `input`.
2742
2940
 
2743
- Shape:
2744
- - Input: :math:`(N, C, W_{in})`, :math:`(N, C, H_{in}, W_{in})` or :math:`(N, C, D_{in}, H_{in}, W_{in})`
2745
- - Output: :math:`(N, C, W_{out})`, :math:`(N, C, H_{out}, W_{out})`
2746
- or :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
2747
-
2748
- .. math::
2749
- D_{out} = \left\lfloor D_{in} \times \text{scale\_factor} \right\rfloor
2750
-
2751
- .. math::
2752
- H_{out} = \left\lfloor H_{in} \times \text{scale\_factor} \right\rfloor
2753
-
2754
- .. math::
2755
- W_{out} = \left\lfloor W_{in} \times \text{scale\_factor} \right\rfloor
2756
-
2757
2941
  Raises:
2758
2942
  TypeError: `input` is not a Tensor.
2759
2943
  ValueError: Both `size` and `scale_factor` are not empty.
@@ -2771,12 +2955,12 @@ def interpolate_ext(input,
2771
2955
 
2772
2956
  Examples:
2773
2957
  >>> import mindspore
2774
- >>> from mindspore import Tensor, mint
2958
+ >>> from mindspore import Tensor, ops
2775
2959
  >>> input = Tensor([[[1, 2, 3], [4, 5, 6]]], mindspore.float32)
2776
- >>> output = mint.interpolate(input, size=(6,), mode='nearest')
2960
+ >>> output = ops.interpolate_ext(input, size=(6,), mode='nearest')
2777
2961
  >>> print(output)
2778
- [[[1. 1. 2. 2. 3. 3.]
2779
- [4. 4. 5. 5. 6. 6.]]]
2962
+ [[[1. 1. 2. 2. 3. 3.]
2963
+ [4. 4. 5. 5. 6. 6.]]]
2780
2964
  """
2781
2965
 
2782
2966
  def run_nearest(x, size, align_corners=None, scale_factor=None):
@@ -2914,58 +3098,6 @@ def softsign(x):
2914
3098
  return softsign_(x)
2915
3099
 
2916
3100
 
2917
- def soft_margin_loss(input, target, reduction='mean'):
2918
- r"""
2919
- Calculate the soft margin loss of input and target.
2920
-
2921
- Creates a criterion that optimizes a two-class classification
2922
- logistic loss between input tensor :math:`x` and target tensor :math:`y`
2923
- (containing 1 or -1).
2924
-
2925
- .. math::
2926
- \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
2927
-
2928
- where :math:`x.nelement()` is the number of elements of :math:`x`.
2929
-
2930
- .. warning::
2931
- This is an experimental API that is subject to change or deletion.
2932
-
2933
- Args:
2934
- input (Tensor): Predict data. Data type must be float16 or float32.
2935
- target (Tensor): Ground truth data, with the same type and shape as `input`.
2936
- reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2937
- ``'sum'`` . Default: ``'mean'`` .
2938
-
2939
- - ``'none'``: no reduction will be applied.
2940
- - ``'mean'``: compute and return the mean of elements in the output.
2941
- - ``'sum'``: the output elements will be summed.
2942
-
2943
- Outputs:
2944
- Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `input`.
2945
- Otherwise, a scalar value will be returned.
2946
-
2947
- Raises:
2948
- TypeError: If `input` or `target` is not a Tensor.
2949
- TypeError: If dtype of `input` or `target` is neither float16 nor float32.
2950
- ValueError: If shape of `input` is not the same as that of `target`.
2951
- ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
2952
-
2953
- Supported Platforms:
2954
- ``Ascend`` ``GPU``
2955
-
2956
- Examples:
2957
- >>> import mindspore
2958
- >>> import numpy as np
2959
- >>> from mindspore import Tensor, ops
2960
- >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
2961
- >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
2962
- >>> output = ops.soft_margin_loss(logits, labels)
2963
- >>> print(output)
2964
- 0.6764238
2965
- """
2966
- soft_margin_loss_op = _get_cache_prim(P.SoftMarginLoss)(reduction=reduction)
2967
- output = soft_margin_loss_op(input, target)
2968
- return output
2969
3101
 
2970
3102
 
2971
3103
  def softmax(input, axis=-1, *, dtype=None):
@@ -3030,8 +3162,6 @@ def softmax_ext(input, dim=None, dtype=None):
3030
3162
  input (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
3031
3163
  additional dimensions.
3032
3164
  dim (int, optional): The dim to perform the Softmax operation. Default: ``None`` .
3033
-
3034
- Keyword Args:
3035
3165
  dtype (:class:`mindspore.dtype`, optional): When set, `input` will be converted to the specified type,
3036
3166
  `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: ``None`` .
3037
3167
 
@@ -3264,7 +3394,7 @@ def selu(input_x):
3264
3394
 
3265
3395
  def logsigmoid(x):
3266
3396
  r"""
3267
- Applies logsigmoid activation element-wise. The input is a Tensor with any valid shape.
3397
+ Applies LogSigmoid activation element-wise. The input is a Tensor with any valid shape.
3268
3398
 
3269
3399
  Logsigmoid is defined as:
3270
3400
 
@@ -3631,7 +3761,7 @@ def _replication_pad(input, pad):
3631
3761
  return out
3632
3762
 
3633
3763
 
3634
- def pad_ext(input, pad, mode='constant', value=0.0):
3764
+ def pad_ext(input, pad, mode='constant', value=None):
3635
3765
  r"""
3636
3766
  Pads the input tensor according to the pad.
3637
3767
 
@@ -3679,7 +3809,7 @@ def pad_ext(input, pad, mode='constant', value=0.0):
3679
3809
 
3680
3810
  value (Union[int, float, None], optional): Valid only in ``'constant'`` mode.
3681
3811
  Set the padding value in ``'constant'`` mode. If the value is None, 0 is used as the default padding value.
3682
- Default: ``0.0`` .
3812
+ Default: ``None`` .
3683
3813
 
3684
3814
  Returns:
3685
3815
  Tensor, the tensor after padding.
@@ -3689,7 +3819,7 @@ def pad_ext(input, pad, mode='constant', value=0.0):
3689
3819
  TypeError: If `input` is not a Tensor.
3690
3820
  ValueError: If length of `pad` is not even.
3691
3821
  ValueError: If length of `pad` is greater than 6.
3692
- ValueError: If `mode` is not ``'constant'`` and `value` not ``None``.
3822
+ ValueError: If `mode` is not ``'constant'`` and `value` is neither ``None`` nor 0.
3693
3823
 
3694
3824
  Supported Platforms:
3695
3825
  ``Ascend``
@@ -3717,7 +3847,7 @@ def pad_ext(input, pad, mode='constant', value=0.0):
3717
3847
  value = 0 if value is None else value
3718
3848
  out = constant_pad_nd_op(input, pad, value)
3719
3849
  else:
3720
- if value != 0.0:
3850
+ if value is not None and value != 0:
3721
3851
  raise ValueError(f"Padding mode {mode} doesn\'t take in value argument.")
3722
3852
  if mode == "circular":
3723
3853
  out = _circular_pad(input, pad)
@@ -3897,9 +4027,11 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
3897
4027
  `Empirical Evaluation of Rectified Activations in Convolution Network <https://arxiv.org/pdf/1505.00853.pdf>`_ .
3898
4028
 
3899
4029
  Args:
3900
- input (Tensor): The input of rrelu is a Tensor of any dimension.
3901
- lower (Union[int, float]): Slope of the activation function at x < 0. Default: ``1.0 / 8`` .
3902
- upper (Union[int, float]): Slope of the activation function at x < 0. Default: ``1.0 / 3`` .
4030
+ input (Tensor): The input of rrelu is a Tensor of any dimension.
4031
+ lower (Union[int, float]): Slope of the activation function at data of `input` is less than 0.
4032
+ Default: ``1.0 / 8`` .
4033
+ upper (Union[int, float]): Slope of the activation function at data of `input` is less than 0.
4034
+ Default: ``1.0 / 3`` .
3903
4035
 
3904
4036
  Returns:
3905
4037
  Tensor, after rrelu, has the same type and shape as the `input`.
@@ -4161,7 +4293,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
4161
4293
  N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
4162
4294
  classes.
4163
4295
 
4164
- If `reduction` is not ``None`` (default ``'mean'``), then
4296
+ If `reduction` is not ``'None'`` (default ``'mean'``), then
4165
4297
 
4166
4298
  .. math::
4167
4299
 
@@ -4271,67 +4403,364 @@ def _nll_loss(inputs, target, target_dim=-1, weight=None, ignore_index=None, red
4271
4403
  return loss
4272
4404
 
4273
4405
 
4274
- def l1_loss(input, target, reduction='mean'):
4406
+ def nll_loss_ext(input, target, weight=None, ignore_index=-100, reduction='mean'):
4275
4407
  r"""
4276
- Calculate the mean absolute error between the `input` value and the `target` value.
4277
-
4278
- Assuming that the :math:`x` and :math:`y` (predicted and target value) are 1-D Tensor,
4279
- length :math:`N`, `reduction` is set to ``'none'``, then calculate the loss of
4280
- :math:`x` and :math:`y` without dimensionality reduction.
4408
+ Gets the negative log likelihood loss between input and target.
4281
4409
 
4282
- The formula is as follows:
4410
+ The nll loss with reduction=none can be described as:
4283
4411
 
4284
4412
  .. math::
4285
- \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad \text{with } l_n = \left| x_n - y_n \right|,
4286
4413
 
4287
- where :math:`N` is the batch size.
4414
+ \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top},
4415
+ \quad l_{n}=-w_{t_{n}} x_{n, t_{n}},
4416
+ \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}
4417
+ \{c \not= \text{ignore_index}\},
4288
4418
 
4289
- If `reduction` is ``'mean'`` or ``'sum'`` , then:
4419
+ where :math:`x` is the input, :math:`t` is the target, :math:`w` is the weight,
4420
+ :math:`N` is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index,
4421
+ where :math:`C` is the number of classes.
4422
+
4423
+ If `reduction` is not ``'None'`` (default ``'mean'``), then
4290
4424
 
4291
4425
  .. math::
4292
- \ell(x, y) =
4293
- \begin{cases}
4294
- \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
4295
- \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.}
4296
- \end{cases}
4426
+
4427
+ \ell(x, t)=\left\{\begin{array}{ll}
4428
+ \sum_{n=1}^{N} \frac{1}{\sum_{n=1}^{N} w_{t n}} l_{n}, & \text { if reduction }=\text { 'mean', } \\
4429
+ \sum_{n=1}^{N} l_{n}, & \text { if reduction }=\text { 'sum' }
4430
+ \end{array}\right.
4431
+
4432
+ .. warning::
4433
+ This is an experimental API that is subject to change or deletion.
4297
4434
 
4298
4435
  Args:
4299
- input (Tensor): Predicted value, Tensor of any dimension.
4300
- target (Tensor): Target value, usually has the same shape as the `input`.
4301
- If `input` and `target` have different shape, make sure they can broadcast to each other.
4436
+ input (Tensor): :math:`(N)` or :math:`(N, C)` where `C = number of classes` , `N = batch size` ,
4437
+ or :math:`(N, C, d_1, d_2, ..., d_K)` (for high-dimensional data).
4438
+ `input` is expected to be log-probabilities.
4439
+ Data type only supports float32 or float16 or bfloat16(only supported by
4440
+ Atlas A2 training series products).
4441
+ target (Tensor): :math:`()` or :math:`(N)` ,
4442
+ where the value range is :math:`[0, C-1]`, or :math:`(N, d_1, d_2, ..., d_K)` for
4443
+ high-dimensional loss, data type must be int32 or int64 or uint8.
4444
+ weight (Tensor, optional): A rescaling weight applied to the loss of each batch element.
4445
+ If not None, the shape is :math:`(C,)`.
4446
+ The data type must be float16 or float32 or bfloat16(only supported by Atlas A2 training series products).
4447
+ It should have the same data type as `input` . Default: ``'None'`` .
4448
+ ignore_index (int, optional): Specifies a target value that is ignored
4449
+ and does not contribute to the input gradient. Default: ``-100`` .
4302
4450
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4303
4451
  ``'sum'`` . Default: ``'mean'`` .
4304
4452
 
4305
4453
  - ``'none'``: no reduction will be applied.
4306
- - ``'mean'``: compute and return the mean of elements in the output.
4454
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
4307
4455
  - ``'sum'``: the output elements will be summed.
4308
4456
 
4309
4457
  Returns:
4310
- Tensor or Scalar, if `reduction` is ``'none'``, return a Tensor with same shape and dtype as `input`.
4311
- Otherwise, a scalar value will be returned.
4312
-
4313
- Raises:
4314
- TypeError: If `input` is not a Tensor.
4315
- TypeError: If `target` is not a Tensor.
4316
- ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
4458
+ Tensor. The data type is the same as that of `input`.
4317
4459
 
4318
4460
  Supported Platforms:
4319
- ``Ascend`` ``GPU`` ``CPU``
4461
+ ``Ascend``
4320
4462
 
4321
4463
  Examples:
4322
- >>> from mindspore import Tensor, ops
4323
- >>> from mindspore import dtype as mstype
4324
- >>> x = Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
4325
- >>> target = Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
4326
- >>> output = ops.l1_loss(x, target, reduction="mean")
4327
- >>> print(output)
4328
- 3.0
4464
+ >>> import mindspore
4465
+ >>> import numpy as np
4466
+ >>> from mindspore import Tensor, mint
4467
+ >>> input = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
4468
+ >>> target = mindspore.Tensor(np.array([1, 0, 4]), mindspore.int32)
4469
+ >>> output = mint.nn.functional.nll_loss(input, target)
4470
+
4329
4471
  """
4330
- _check_is_tensor('input', input, "l1_loss")
4331
- _check_is_tensor('target', target, "l1_loss")
4332
- if reduction not in ('mean', 'sum', 'none'):
4333
- raise ValueError(f"For l1_loss, the 'reduction' must be in ['mean', 'sum', 'none'], but got {reduction}.")
4334
- loss = abs_(input - target)
4472
+ return _nllloss_nd(input, target, weight, ignore_index, reduction)
4473
+
4474
+
4475
+ def _nllloss_nd(input, target, weight=None, ignore_index=-100, reduction='mean'):
4476
+ """nllloss_nd inner function"""
4477
+ input_dim = input.ndim
4478
+ class_dim = 0 if input_dim == 1 else 1
4479
+ n_classes = input.shape[class_dim]
4480
+ if weight is None:
4481
+ weight = ones(n_classes, input.dtype)
4482
+ if input_dim < 1:
4483
+ raise ValueError(f"input dim should be less than 1, but got {input_dim}")
4484
+ if input_dim != 1 and input.shape[0] != target.shape[0]:
4485
+ raise ValueError(f"input bacth_size should be equal to target batch_size, but got {input.shape[0]} and "
4486
+ f"{target.shape[0]}")
4487
+ if input_dim == 1 or input_dim == 2:
4488
+ return nllloss_impl(input, target, weight, reduction, ignore_index)[0]
4489
+ if input_dim == 4:
4490
+ return nllloss_2d_op(input, target, weight, reduction, ignore_index)[0]
4491
+ # input_dim==3 or input_dim>4
4492
+ n = input.shape[0]
4493
+ c = input.shape[1]
4494
+ out_size = (n,) + input.shape[2:]
4495
+ if input.size > 0:
4496
+ input = input.view((n, c, 1, -1))
4497
+ else:
4498
+ input = input.view((n, c, 0, 0))
4499
+ if target.size > 0:
4500
+ target = target.view((n, 1, -1))
4501
+ else:
4502
+ target = target.view((n, 0, 0))
4503
+ if reduction != 'none':
4504
+ return nllloss_2d_op(input, target, weight, reduction, ignore_index)[0]
4505
+ ret = nllloss_2d_op(input, target, weight, reduction, ignore_index)[0]
4506
+ return ret.view(out_size)
4507
+
4508
+
4509
+ def _cross_entropy_for_probabilities(input, target, weight, reduction, label_smoothing, class_dim, n_classes):
4510
+ """cross_entropy inner function for class probabilities"""
4511
+ if input.shape != target.shape:
4512
+ raise ValueError("For cross_entropy that target is probabilities, input shape should equal to target shape.")
4513
+ if label_smoothing > 0.0:
4514
+ target = target * (1 - label_smoothing) + label_smoothing / n_classes
4515
+ loss = input * target
4516
+ if weight is not None:
4517
+ weight_ = weight
4518
+ ori_shape = loss.shape
4519
+ if input.ndim > 2:
4520
+ loss = loss.view(ori_shape[:2] + (-1,))
4521
+ weight_ = weight_.view(1, -1, 1)
4522
+ loss = loss * weight_
4523
+ loss = loss.view(ori_shape)
4524
+ if reduction == "mean":
4525
+ return -div(loss.sum(), (input.size / n_classes))
4526
+ if reduction == "sum":
4527
+ return -loss.sum()
4528
+ if reduction == "none":
4529
+ return -loss.sum(class_dim)
4530
+ raise ValueError(f"redution value {reduction} not valid.")
4531
+
4532
+
4533
+ def _cross_entropy_for_class_indices(input, target, weight, ignore_index, reduction, label_smoothing, class_dim,
4534
+ n_classes):
4535
+ """cross_entropy inner function for class indices"""
4536
+ nllloss = _nllloss_nd(input, target, weight, ignore_index, reduction)
4537
+ if label_smoothing > 0.0:
4538
+ if weight is not None:
4539
+ weight_ = weight
4540
+ input_ = input
4541
+ ori_shape = input.shape
4542
+ if input.ndim > 2:
4543
+ input_ = input.view(ori_shape[:2] + (-1,))
4544
+ weight_ = weight_.view(1, -1, 1)
4545
+ loss = input_ * weight_
4546
+ loss = loss.view(ori_shape)
4547
+ smooth_loss = -loss.sum(class_dim)
4548
+ else:
4549
+ smooth_loss = -input.sum(class_dim)
4550
+ ignore_mask = ops.eq(target, ignore_index)
4551
+ smooth_loss = masked_fill_op(smooth_loss, ignore_mask, 0)
4552
+ if reduction == "mean":
4553
+ true_mask = ~ignore_mask
4554
+ if weight is not None:
4555
+ weight_sum = gather_ext(weight, 0, flatten_ext(masked_select(target, true_mask))).sum()
4556
+ if weight_sum == 0:
4557
+ ret = smooth_loss.sum()
4558
+ else:
4559
+ ret = smooth_loss.sum() / weight_sum
4560
+ else:
4561
+ weight_sum = true_mask.sum()
4562
+ if weight_sum == 0:
4563
+ ret = smooth_loss.sum()
4564
+ else:
4565
+ ret = smooth_loss.sum() / weight_sum
4566
+ elif reduction == "sum":
4567
+ ret = smooth_loss.sum()
4568
+ elif reduction == "none":
4569
+ ret = smooth_loss
4570
+ else:
4571
+ raise ValueError(f"redution value {reduction} not valid.")
4572
+ return (1 - label_smoothing) * nllloss + ret * (label_smoothing / n_classes)
4573
+ return nllloss
4574
+
4575
+
4576
+ def cross_entropy_ext(input, target, weight=None, ignore_index=-100, reduction='mean', label_smoothing=0.0):
4577
+ r"""
4578
+ The cross entropy loss between input and target.
4579
+
4580
+ The cross entropy supports two kind of targets:
4581
+
4582
+ - Class indices (int) in the range :math:`[0, C)` where :math:`C` is the number of classes,
4583
+ the loss with reduction=none can be described as:
4584
+
4585
+ .. math::
4586
+
4587
+ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
4588
+ l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
4589
+ \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
4590
+
4591
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, :math:`N` is the batch size,
4592
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
4593
+
4594
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
4595
+
4596
+ .. math::
4597
+
4598
+ \ell(x, y) = \begin{cases}
4599
+ \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
4600
+ \text{if reduction} = \text{'mean',}\\
4601
+ \sum_{n=1}^N l_n, &
4602
+ \text{if reduction} = \text{'sum'.}
4603
+ \end{cases}
4604
+
4605
+ - Probabilities (float) for each class, useful when labels beyond a single class per minibatch item
4606
+ are required, the loss with reduction=none can be described as:
4607
+
4608
+ .. math::
4609
+
4610
+ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
4611
+ l_n = - \sum_{c=1}^C w_c \log \frac{\exp(x_{n,c})}{\sum_{i=1}^C \exp(x_{n,i})} y_{n,c}
4612
+
4613
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
4614
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
4615
+
4616
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
4617
+
4618
+ .. math::
4619
+
4620
+ \ell(x, y) = \begin{cases}
4621
+ \frac{\sum_{n=1}^N l_n}{N}, &
4622
+ \text{if reduction} = \text{'mean',}\\
4623
+ \sum_{n=1}^N l_n, &
4624
+ \text{if reduction} = \text{'sum'.}
4625
+ \end{cases}
4626
+
4627
+ .. warning::
4628
+ This is an experimental API that is subject to change or deletion.
4629
+
4630
+ Note:
4631
+ Dynamic shape, dynamic rank and variable constant input are not supported in `strict graph mode
4632
+ (jit_syntax_level=mindspore.STRICT)
4633
+ <https://www.mindspore.cn/tutorials/en/master/compile/static_graph.html>`_.
4634
+
4635
+ Args:
4636
+ input (Tensor): :math:`(N)` or :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
4637
+ in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)`.
4638
+ `input` is expected to be log-probabilities, data type must be float16 or float32 or bfloat16
4639
+ (only supported by Atlas A2 training series products).
4640
+ target (Tensor): For class indices, tensor of shape :math:`()`, :math:`(N)` or
4641
+ :math:`(N, d_1, d_2, ..., d_K)` , data type must be int32 or int64. For probabilities, tensor of shape
4642
+ :math:`(N,)` , :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32
4643
+ or bfloat16(only supported by Atlas A2 training series products).
4644
+ weight (Tensor, optional): A rescaling weight applied to the loss of each batch element.
4645
+ If not None, the shape is :math:`(C,)`, data type must be float16 or float32 or bfloat16(only supported by
4646
+ Atlas A2 training series products). Default: ``None`` .
4647
+ ignore_index (int, optional): Specifies a target value that is ignored and does not contribute to the input
4648
+ gradient. Only valid in class indices, please set it to a negative number in probabilities.
4649
+ Default: ``-100`` .
4650
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4651
+ ``'sum'`` . Default: ``'mean'`` .
4652
+
4653
+ - ``'none'``: no reduction will be applied.
4654
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
4655
+ - ``'sum'``: the output elements will be summed.
4656
+
4657
+ label_smoothing (float, optional): Label smoothing values, a regularization tool used to prevent the model
4658
+ from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: ``0.0`` .
4659
+
4660
+ Returns:
4661
+ Tensor, the data type is the same as `input` .
4662
+
4663
+ Supported Platforms:
4664
+ ``Ascend``
4665
+
4666
+ Examples:
4667
+ >>> import mindspore as ms
4668
+ >>> from mindspore import ops, Tensor
4669
+ >>> import numpy as np
4670
+ >>> # Case 1: Indices labels
4671
+ >>> inputs = Tensor(np.random.randn(3, 5), ms.float32)
4672
+ >>> target = Tensor(np.array([1, 0, 4]), ms.int32)
4673
+ >>> output = ops.cross_entropy_ext(inputs, target)
4674
+ >>> # Case 2: Probability labels
4675
+ >>> inputs = Tensor(np.random.randn(3, 5), ms.float32)
4676
+ >>> target = Tensor(np.random.randn(3, 5), ms.float32)
4677
+ >>> output = ops.cross_entropy_ext(inputs, target)
4678
+ """
4679
+ if not isinstance(input, Tensor) or not isinstance(target, Tensor):
4680
+ raise TypeError(
4681
+ f"For cross_entropy, input and target must be Tensor, but got input:{type(input)}, target:{type(target)}.")
4682
+ if weight is not None and not isinstance(weight, Tensor):
4683
+ raise TypeError(f"For cross_entropy, weight must be Tensor or None, but got {type(weight)}.")
4684
+ if label_smoothing < 0.0 or label_smoothing > 1.0:
4685
+ raise ValueError(f"For cross_entropy, label_smoothing must in [0, 1]")
4686
+ if input.ndim == 0 or input.shape[0] == 0:
4687
+ raise ValueError(f"For cross_entropy, input don't support 0-dim and shape[0].")
4688
+ class_dim = 0 if input.ndim == 1 else 1
4689
+ n_classes = input.shape[class_dim]
4690
+ input = log_softmax_ext(input, class_dim, dtype=input.dtype)
4691
+ # for probabilities
4692
+ target_dtype = target.dtype
4693
+ if isinstance(target_dtype, type(mstype.tensor_type)):
4694
+ target_dtype = target_dtype.element_type()
4695
+ if target_dtype in mstype.float_type:
4696
+ return _cross_entropy_for_probabilities(input, target, weight, reduction, label_smoothing, class_dim,
4697
+ n_classes)
4698
+ # for class indices
4699
+ return _cross_entropy_for_class_indices(input, target, weight, ignore_index, reduction, label_smoothing,
4700
+ class_dim, n_classes)
4701
+
4702
+
4703
+ def l1_loss(input, target, reduction='mean'):
4704
+ r"""
4705
+ Calculate the mean absolute error between the `input` value and the `target` value.
4706
+
4707
+ Assuming that the :math:`x` and :math:`y` (predicted and target value) are 1-D Tensor,
4708
+ length :math:`N`, `reduction` is set to ``'none'``, then calculate the loss of
4709
+ :math:`x` and :math:`y` without dimensionality reduction.
4710
+
4711
+ The formula is as follows:
4712
+
4713
+ .. math::
4714
+ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad \text{with } l_n = \left| x_n - y_n \right|,
4715
+
4716
+ where :math:`N` is the batch size.
4717
+
4718
+ If `reduction` is set to ``'mean'`` or ``'sum'`` , then:
4719
+
4720
+ .. math::
4721
+ \ell(x, y) =
4722
+ \begin{cases}
4723
+ \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
4724
+ \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.}
4725
+ \end{cases}
4726
+
4727
+ Args:
4728
+ input (Tensor): Predicted value, Tensor of any dimension.
4729
+ target (Tensor): Target value, usually has the same shape as the `input`.
4730
+ If `input` and `target` have different shape, make sure they can broadcast to each other.
4731
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4732
+ ``'sum'`` . Default: ``'mean'`` .
4733
+
4734
+ - ``'none'``: no reduction will be applied.
4735
+ - ``'mean'``: compute and return the mean of elements in the output.
4736
+ - ``'sum'``: the output elements will be summed.
4737
+
4738
+ Returns:
4739
+ Tensor or Scalar, if `reduction` is ``'none'``, return a Tensor with same shape and dtype as `input`.
4740
+ Otherwise, a scalar value will be returned.
4741
+
4742
+ Raises:
4743
+ TypeError: If `input` is not a Tensor.
4744
+ TypeError: If `target` is not a Tensor.
4745
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
4746
+
4747
+ Supported Platforms:
4748
+ ``Ascend`` ``GPU`` ``CPU``
4749
+
4750
+ Examples:
4751
+ >>> from mindspore import Tensor, ops
4752
+ >>> from mindspore import dtype as mstype
4753
+ >>> x = Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
4754
+ >>> target = Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
4755
+ >>> output = ops.l1_loss(x, target, reduction="mean")
4756
+ >>> print(output)
4757
+ 3.0
4758
+ """
4759
+ _check_is_tensor('input', input, "l1_loss")
4760
+ _check_is_tensor('target', target, "l1_loss")
4761
+ if reduction not in ('mean', 'sum', 'none'):
4762
+ raise ValueError(f"For l1_loss, the 'reduction' must be in ['mean', 'sum', 'none'], but got {reduction}.")
4763
+ loss = abs_(input - target)
4335
4764
  return _get_loss(loss, reduction, "l1_loss")
4336
4765
 
4337
4766
 
@@ -4348,8 +4777,8 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
4348
4777
  .. math::
4349
4778
  L_{i} =
4350
4779
  \begin{cases}
4351
- \frac{0.5 (x_i - y_i)^{2}}{\beta}, & \text{if } |x_i - y_i| < \beta \\
4352
- |x_i - y_i| - 0.5 * \beta, & \text{otherwise. }
4780
+ \frac{0.5 (x_i - y_i)^{2}}{\text{beta}}, & \text{if } |x_i - y_i| < \text{beta} \\
4781
+ |x_i - y_i| - 0.5 * \text{beta}, & \text{otherwise. }
4353
4782
  \end{cases}
4354
4783
 
4355
4784
  If `reduction` is not `none`, then:
@@ -4364,12 +4793,26 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
4364
4793
  Here :math:`\text{beta}` controls the point where the loss function changes from quadratic to linear.
4365
4794
  :math:`\text{beta}>0` , its default value is ``1.0`` . :math:`N` is the batch size.
4366
4795
 
4796
+ .. warning::
4797
+ This API has poor performance on CPU and it is recommended to run it on the Ascend/GPU.
4798
+
4367
4799
  Args:
4368
- input (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4369
- Data type is float16, float32 or float64.
4370
- target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`, same shape and dtype as the `input`.
4371
- beta (float): A parameter used to control the point where the function will change between
4372
- L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
4800
+ input (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means,
4801
+ any number of additional dimensions.Supported dtypes:
4802
+
4803
+ - Ascend: float16, float32, bfloat16.
4804
+ - CPU/GPU: float16, float32, float64.
4805
+ target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`.
4806
+
4807
+ - CPU/Ascend: has the same shape as the `input`, `target` and `input`
4808
+ comply with the implicit type conversion rules to make the data types consistent.
4809
+ - GPU: has the same shape and dtype as the `input`.
4810
+
4811
+ beta (number, optional): A parameter used to control the point where the function will change between
4812
+ L1 to L2 loss. Default: ``1.0`` .
4813
+
4814
+ - Ascend: The value should be equal to or greater than zero.
4815
+ - CPU/GPU: The value should be greater than zero.
4373
4816
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4374
4817
  ``'sum'`` . Default: ``'none'`` .
4375
4818
 
@@ -4379,14 +4822,15 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
4379
4822
 
4380
4823
  Returns:
4381
4824
  Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `input`.
4382
- Otherwise, the shape of output tensor is :math:`(1,)`.
4825
+ Otherwise, the shape of output tensor is :math:`()`.
4383
4826
 
4384
4827
  Raises:
4385
- TypeError: If `beta` is not a float.
4386
- ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
4387
- TypeError: If dtype of `input` or `target` is not one of float16, float32, float64.
4388
- ValueError: If `beta` is less than or equal to 0.
4828
+ TypeError: If input `input`, `target` is not Tensor.
4829
+ RuntimeError: If dtype of `input` or `target` is not one of float16, float32, float64, bfloat16.
4389
4830
  ValueError: If shape of `input` is not the same as `target`.
4831
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
4832
+ TypeError: If `beta` is not a float, int or bool.
4833
+ RuntimeError: If `beta` is less than or equal to 0.
4390
4834
 
4391
4835
  Supported Platforms:
4392
4836
  ``Ascend`` ``GPU`` ``CPU``
@@ -4476,8 +4920,8 @@ def leaky_relu(input, alpha=0.2):
4476
4920
 
4477
4921
  Args:
4478
4922
  input (Tensor): The input of leaky_relu is a Tensor of any dimension.
4479
- alpha (Union[int, float]): Slope of the activation function when the element of `input` is less than 0.
4480
- Default: ``0.2`` .
4923
+ alpha (Union[int, float], optional): Slope of the activation function when
4924
+ the element of `input` is less than 0. Default: ``0.2`` .
4481
4925
 
4482
4926
  Returns:
4483
4927
  Tensor, has the same type and shape as the `input`.
@@ -4509,37 +4953,25 @@ def leaky_relu(input, alpha=0.2):
4509
4953
 
4510
4954
  def intopk(x1, x2, k):
4511
4955
  r"""
4512
- Determines whether the targets are in the top `k` predictions.
4956
+ Return whether the elements in second input tensor exist among the top `k` elements of the first input tensor.
4513
4957
 
4514
4958
  Args:
4515
- x1 (Tensor): A 2D Tensor defines the predictions of a batch of samples with float16 or float32
4516
- data type.
4517
- x2 (Tensor): A 1D Tensor defines the labels of a batch of samples with int32 data type. The size of `x2`
4518
- must be equal to the first dimension of `x1`. The values of `x2` can not be negative and
4519
- must be equal to or less than index of x1's second dimension.
4520
- k (int): Specifies the number of top elements to be used for computing precision along the last dimension.
4959
+ x1 (Tensor): The 2-D input tensor.
4960
+ x2 (Tensor): The 1-D input tensor, should satisfy :math:`x2.shape[0] = x1.shape[0]` .
4961
+ k (int): Top `k` elements.
4521
4962
 
4522
4963
  Returns:
4523
- Tensor has 1 dimension of type bool and the same shape with `x2`. For labeling sample `i` in `x2`,
4524
- if the label in the first `k` predictions for sample `i` is in `x1`, then the value is True, otherwise False.
4525
-
4526
- Raises:
4527
- TypeError: If `k` is not an int.
4528
- TypeError: If `x1` or `x2` is not a Tensor.
4529
- TypeError: If dtype of `x1` is neither float16 nor float32.
4964
+ A 1-D tensor whose data type is bool, has the same shape with `x2`.
4530
4965
 
4531
4966
  Supported Platforms:
4532
4967
  ``Ascend`` ``GPU`` ``CPU``
4533
4968
 
4534
4969
  Examples:
4535
4970
  >>> import mindspore
4536
- >>> import numpy as np
4537
- >>> from mindspore import Tensor, ops
4538
- >>> x1 = Tensor(np.array([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]]), mindspore.float32)
4539
- >>> x2 = Tensor(np.array([1, 3]), mindspore.int32)
4540
- >>> output = ops.intopk(x1, x2, 3)
4541
- >>> print(output)
4542
- [ True False]
4971
+ >>> x1 = mindspore.tensor([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]], mindspore.float32)
4972
+ >>> x2 = mindspore.tensor([1, 3], mindspore.int32)
4973
+ >>> mindspore.ops.intopk(x1, x2, 3)
4974
+ Tensor(shape=[2], dtype=Bool, value= [ True, False])
4543
4975
  """
4544
4976
  _in_topk = _get_cache_prim(P.InTopK)(k)
4545
4977
  return _in_topk(x1, x2)
@@ -4961,7 +5393,7 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
4961
5393
  H_{in}, W_{in})` (5-D case) and dtype of float32 or float64.
4962
5394
  grid (Tensor): flow-field with shape of :math:`(N, H_{out}, W_{out}, 2)` (4-D case) or :math:`(N, D_{out},
4963
5395
  H_{out}, W_{out}, 3)` (5-D case) and same dtype as `input`.
4964
- mode (str): An optional string specifying the interpolation method. The optional values are
5396
+ mode (str, optional): An optional string specifying the interpolation method. The optional values are
4965
5397
  ``'bilinear'``, ``'nearest'``. Default: ``'bilinear'`` . Note: `bicubic` is not supported yet. When
4966
5398
  `mode="bilinear"` and the input is 5-D, the interpolation mode used internally will actually
4967
5399
  be trilinear. However, when the input is 4-D, the interpolation mode will legistimately be bilinear.
@@ -4976,9 +5408,10 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
4976
5408
  It performs bilinear interpolation in the two spatial dimensions and linear interpolation along
4977
5409
  the third dimension. It is commonly used for volume or 3D image interpolation.
4978
5410
 
4979
- padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
5411
+ padding_mode (str, optional): An optional string specifying the pad method.
5412
+ The optional values are "zeros", "border" or
4980
5413
  "reflection". Default: ``'zeros'`` .
4981
- align_corners (bool): If set to `True`, the extrema (-1 and 1) are considered as referring to
5414
+ align_corners (bool, optional): If set to `True`, the extrema (-1 and 1) are considered as referring to
4982
5415
  the center points of the input's corner pixels. If set to `False`, they are instead considered as referring
4983
5416
  to the corner points of the input's corner pixels, making the sampling more resolution agnostic. Default:
4984
5417
  ``False`` .
@@ -5389,12 +5822,15 @@ def conv3d_transpose(inputs, weight, pad_mode='valid', padding=0, stride=1, dila
5389
5822
  Args:
5390
5823
  inputs (Tensor): The gradients with respect to the output of the convolution.
5391
5824
  The shape conforms to the default.
5392
- data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`. Currently dout data type only supports float16
5393
- and float32.
5825
+ data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`.
5826
+ Supported dtypes:
5827
+
5828
+ - Ascend: float16.
5829
+ - GPU/CPU: float16, float32.
5394
5830
  weight (Tensor): Set size of kernel is :math:`(K_d, K_h, K_w)`, then the shape is
5395
5831
  :math:`(C_{in}, C_{out}//group, K_d, K_h, K_w)`. Where :math:`group` is the Args parameter,
5396
5832
  :math:`//` is the symbol for integer division.
5397
- Currently weight data type only supports float16 and float32.
5833
+ It has the same dtype as `dout`.
5398
5834
  pad_mode (str): Specifies padding mode. The optional values are
5399
5835
  "same", "valid", "pad". Default: "valid".
5400
5836
 
@@ -5538,9 +5974,9 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5538
5974
 
5539
5975
  The shape of the convolutional kernel is given by :math:`(\text{kernel_size})`,
5540
5976
  where :math:`\text{kernel_size}` is the width of the kernel.
5541
- If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5542
- will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`,
5543
- where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
5977
+ If we consider the input and output channels as well as the `groups` parameter, the complete kernel shape
5978
+ will be :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
5979
+ where `groups` is the number of groups dividing `x`'s input channel when applying group convolution.
5544
5980
 
5545
5981
  For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5546
5982
  <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
@@ -5655,7 +6091,7 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5655
6091
 
5656
6092
  def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
5657
6093
  r"""
5658
- Applies a 2D convolution over an input tensor. The input tenor is typically of
6094
+ Applies a 2D convolution over an input tensor. The input tensor is typically of
5659
6095
  shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
5660
6096
  channel number, :math:`H` is feature height, :math:`W` is feature width.
5661
6097
 
@@ -5690,9 +6126,9 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5690
6126
  The shape of the convolutional kernel is given by :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
5691
6127
  where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the kernel,
5692
6128
  respectively.
5693
- If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5694
- will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
5695
- where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
6129
+ If we consider the input and output channels as well as the `groups` parameter, the complete kernel shape
6130
+ will be :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
6131
+ where `groups` is the number of groups dividing `x`'s input channel when applying group convolution.
5696
6132
 
5697
6133
  For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5698
6134
  <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
@@ -5792,6 +6228,127 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5792
6228
  return output
5793
6229
 
5794
6230
 
6231
+ def conv1d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
6232
+ r"""
6233
+ Applies a 1D convolution over an input tensor. The input tenor is typically
6234
+ of shape :math:`(N, C_{in}, L_{in})`,
6235
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`L` is sequence length.
6236
+
6237
+ The output is calculated based on formula:
6238
+
6239
+ .. math::
6240
+
6241
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
6242
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
6243
+
6244
+ where :math:`bias` is the output channel bias, :math:`ccor` is
6245
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
6246
+ :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
6247
+
6248
+ - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
6249
+ where :math:`N` is the batch size of the input.
6250
+
6251
+ - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`,
6252
+ where :math:`C_{out}` is the number of
6253
+ output channels, which is also equal to the number of kernels.
6254
+
6255
+ - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`,
6256
+ where :math:`C_{in}` is the number of
6257
+ input channels, which is also equal to the number of channels in the convolutional kernels.
6258
+
6259
+ Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th
6260
+ output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional
6261
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
6262
+ channel in the :math:`i`-th batch of the input feature map.
6263
+
6264
+ The shape of the convolutional kernel is given by :math:`(\text{kernel_size})`,
6265
+ where :math:`\text{kernel_size}` is the length of the kernel.
6266
+ If we consider the input and output channels as well as the `groups` parameter, the complete kernel shape
6267
+ will be :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
6268
+ where `groups` is the number of groups dividing `x`'s input channel when applying groups convolution.
6269
+
6270
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
6271
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
6272
+
6273
+ .. warning::
6274
+ This is an experimental API that is subject to change or deletion.
6275
+
6276
+ Args:
6277
+ input (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})` or :math:`(C_{in}, L_{in})`.
6278
+ weight (Tensor): Tensor of shape
6279
+ :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`, then the size of kernel
6280
+ is :math:`(\text{kernel_size})`.
6281
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
6282
+ When bias is ``None`` , zeros will be used. Default: ``None`` .
6283
+ stride (Union[int, tuple[int], list[int]], optional): The movement stride of the 1D convolution kernel.
6284
+ The data type is an integer or a tuple of one integer. Default: ``1`` .` .
6285
+ padding (Union[int, tuple[int], list[int], str], optional): The number of padding
6286
+ on the input.
6287
+ The data type is an integer or a tuple of one integer or string {`valid`, `same`}.
6288
+ The value should be greater than or equal to 0. Default: ``0`` .
6289
+
6290
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
6291
+ are the same when `stride` is set to ``1``.
6292
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
6293
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right side.
6294
+ If this mode is set, `stride` must be 1.
6295
+
6296
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
6297
+ possible length. Extra sequence that could not complete a full stride will
6298
+ be discarded.
6299
+
6300
+ dilation (Union[int, tuple[int], list[int]], optional): Specifies the dilation rate to use for
6301
+ dilated convolution. It can be a single int or a tuple of 1 integer.
6302
+ Assuming :math:`dilation=(d)`, the convolutional kernel samples the input with a
6303
+ spacing of :math:`d-1` elements in the length direction.
6304
+ Default: ``1`` .
6305
+ groups (int, optional): Splits filter into groups, `in_channels` and `out_channels` must be
6306
+ divisible by `groups`. If the groups is equal to `in_channels` and `out_channels`,
6307
+ this 1D convolution layer also can be called 1D depthwise convolution layer. Default: ``1`` .
6308
+
6309
+ - :math:`(C_{in} \text{ % } \text{groups} == 0)` , :math:`(C_{out} \text{ % } \text{groups} == 0)` ,
6310
+ :math:`(C_{out} >= \text{groups})` , :math:`(\text{kernel_size[1]} = C_{in} / \text{groups})`。
6311
+
6312
+ Returns:
6313
+ Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, L_{out})`.
6314
+ To see how different pad modes affect the output shape, please refer to
6315
+ :class:`mindspore.mint.nn.Conv1d` for more details.
6316
+
6317
+ Raises:
6318
+ ValueError: Args and size of the input feature map should satisfy the output formula to ensure that the size of
6319
+ the output feature map is positive; otherwise, an error will be reported.
6320
+ RuntimeError: On Ascend, due to the limitation of the L1 cache size of different NPU chip, if input size or
6321
+ kernel size is too large, it may trigger an error.
6322
+ TypeError: If `in_channels`, `out_channels` or `groups` is not an int.
6323
+ TypeError: If `kernel_size`, `stride` or `dilation` is neither an int not a tuple.
6324
+ ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
6325
+ ValueError: If `padding` is less than 0.
6326
+ ValueError: If `padding` is `same` , `stride` is not equal to 1.
6327
+ ValueError: The input parameters do not satisfy the convolution output formula.
6328
+ ValueError: The KernelSize cannot exceed the size of the input feature map.
6329
+ ValueError: The value of padding cannot cause the calculation area to exceed the input size.
6330
+
6331
+ Supported Platforms:
6332
+ ``Ascend``
6333
+
6334
+ Examples:
6335
+ >>> import mindspore
6336
+ >>> import numpy as np
6337
+ >>> from mindspore import Tensor, ops, mint
6338
+ >>> x = Tensor(np.ones([10, 32, 32]), mindspore.float32)
6339
+ >>> weight = Tensor(np.ones([32, 32, 3]), mindspore.float32)
6340
+ >>> output = mint.nn.functional.conv1d(x, weight)
6341
+ >>> print(output.shape)
6342
+ (10, 32, 30)
6343
+ """
6344
+ if isinstance(padding, (int, tuple, list)):
6345
+ return conv1d_ext_op(input, weight, bias, stride, padding, dilation, groups)
6346
+ if isinstance(padding, str):
6347
+ return conv1d_padding_op(input, weight, bias, stride, padding, dilation, groups)
6348
+ raise TypeError(f"For conv1d, the parameter 'padding' must be a tuple/list " \
6349
+ f"or a string, but got {type(padding)}")
6350
+
6351
+
5795
6352
  def _check_stride_when_same_mode(stride):
5796
6353
  """ stride must be 1 when pad mode is same """
5797
6354
  if isinstance(stride, int):
@@ -5840,9 +6397,9 @@ def _get_pad_nd_info(pad_l, pad_r):
5840
6397
 
5841
6398
  def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
5842
6399
  r"""
5843
- Applies a 2D convolution over an input tensor. The input tenor is typically of
5844
- shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
5845
- channel number, :math:`H` is feature height, :math:`W` is feature width.
6400
+ Applies a 2D convolution over an input tensor. The input tensor is typically of
6401
+ shape :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(C_{in}, H_{in}, W_{in})`,
6402
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is feature height, :math:`W` is feature width.
5846
6403
 
5847
6404
  The output is calculated based on formula:
5848
6405
 
@@ -5855,8 +6412,6 @@ def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups
5855
6412
  the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5856
6413
  , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5857
6414
 
5858
- Here are the indices' meanings:
5859
-
5860
6415
  - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
5861
6416
  where :math:`N` is the batch size of the input.
5862
6417
 
@@ -5883,55 +6438,67 @@ def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups
5883
6438
  <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
5884
6439
  `ConvNets <http://cs231n.github.io/convolutional-networks/>`_.
5885
6440
 
5886
- Note:
5887
- On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
5888
- That is, when `groups>1`, condition :math:`C_{in}` = :math:`C_{out}` = `groups` must be satisfied.
6441
+ .. warning::
6442
+ This is an experimental API that is subject to change or deletion.
5889
6443
 
5890
6444
  Args:
5891
- input (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
6445
+ input (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(C_{in}, H_{in}, W_{in})`.
5892
6446
  weight (Tensor): Tensor of shape
5893
6447
  :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
5894
6448
  is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
5895
6449
  bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
5896
6450
  When bias is ``None`` , zeros will be used. Default: ``None`` .
5897
- stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
5898
- the height and width of movement are both strides, or a tuple of two int numbers that
6451
+ stride (Union(int, tuple[int], list[int]), optional): The distance of kernel moving, an int number that
6452
+ represents the height and width of movement are both strides, or a tuple of two int numbers that
5899
6453
  represent height and width of movement respectively. Default: ``1`` .
5900
- padding (Union(int, tuple[int], list[int], str), optional): Implicit paddings on both sides of the input `x`.
5901
- Can be a string, one integer or a tuple/list with 2 integers.
5902
- If `padding` is a string, the optional values are ``"same"`` , ``"valid"``.
5903
-
5904
- - same: Adopts the way of completion. The height and width of the output will be equal to
5905
- the input `x` divided by stride. The padding will be evenly calculated in top and bottom,
5906
- left and right possiblily. Otherwise, the last extra padding will be calculated from the bottom
5907
- and the right side. If this mode is set, `padding` must be 0.
5908
-
5909
- - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
5910
- without padding. Extra pixels will be discarded. If this mode is set, `padding` must be 0.
5911
-
5912
- If `padding` is one integer, the paddings of top, bottom, left and right are the same, equal to padding.
5913
- If `padding` is a tuple/list with 2 integers, the padding of top adn bottom is padding[0],
5914
- and the padding of left and right is padding[1]. Default: ``0`` .
5915
- dilation (Union(int, tuple[int]), optional): Gaps between kernel elements.The data type is int or a tuple of
5916
- 2 integers. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
6454
+ padding (Union[int, tuple[int], list[int], str], optional): The number of padding
6455
+ on the height and width directions of the input.
6456
+ The data type is an integer or a tuple of two integers or string {`valid`, `same`}. If `padding` is an
6457
+ integer, then `padding_{H}` and `padding_{W}` are all equal to `padding`.
6458
+ If `padding` is a tuple of 2 integers, then `padding_{H}` and `padding_{W}`
6459
+ is equal to `padding[0]` and `padding[1]` respectively.
6460
+ The value should be greater than or equal to 0. Default: ``0`` .
6461
+
6462
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
6463
+ are the same when `stride` is set to ``1``.
6464
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
6465
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
6466
+ If this mode is set, `stride` must be 1.
6467
+
6468
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
6469
+ possible height and width. Extra pixels that could not complete a full stride will
6470
+ be discarded.
6471
+
6472
+ dilation (Union(int, tuple[int], list[int]), optional): Gaps between kernel elements.The data type
6473
+ is int or a tuple of 2 integers. Specifies the dilation rate to use for dilated convolution.
6474
+ If set to be :math:`k > 1`,
5917
6475
  there will be :math:`k - 1` pixels skipped for each sampling location. Its value must
5918
6476
  be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: ``1`` .
5919
6477
  groups (int, optional): Splits `input` into groups. Default: ``1`` .
5920
6478
 
6479
+ - :math:`(C_{in} \text{ % } \text{groups} == 0)` , :math:`(C_{out} \text{ % } \text{groups} == 0)` ,
6480
+ :math:`(C_{out} >= \text{groups})` , :math:`(\text{kernel_size[1]} = C_{in} / \text{groups})`
6481
+
5921
6482
  Returns:
5922
6483
  Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
5923
6484
  To see how different pad modes affect the output shape, please refer to
5924
- :class:`mindspore.nn.Conv2d` for more details.
5925
-
6485
+ :class:`mindspore.mint.nn.Conv2d` for more details.
5926
6486
 
5927
6487
  Raises:
5928
- TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
5929
- TypeError: `groups` is not an int.
6488
+ ValueError: Args and size of the input feature map should satisfy the output formula to ensure that the size of
6489
+ the output feature map is positive; otherwise, an error will be reported. For more details on the output
6490
+ formula, please refer to :class:`mindspore.mint.nn.Conv2d`.
6491
+ RuntimeError: On Ascend, due to the limitation of the L1 cache size of different NPU chip, if input size or
6492
+ kernel size is too large, it may trigger an error.
6493
+ TypeError: If `in_channels` , `out_channels` or `groups` is not an int.
6494
+ TypeError: If `kernel_size` , `stride` or `dilation` is neither an int nor a tuple.
5930
6495
  TypeError: If `bias` is not a Tensor.
5931
6496
  ValueError: If the shape of `bias` is not :math:`(C_{out})` .
5932
6497
  ValueError: If `stride` or `dilation` is less than 1.
5933
- ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
5934
- ValueError: If `padding` is a tuple/list whose length is not equal to 2.
6498
+ ValueError: If `padding` is `same` , `stride` is not equal to 1.
6499
+ ValueError: The input parameters do not satisfy the convolution output formula.
6500
+ ValueError: The KernelSize cannot exceed the size of the input feature map.
6501
+ ValueError: The value of padding cannot cause the calculation area to exceed the input size.
5935
6502
 
5936
6503
  Supported Platforms:
5937
6504
  ``Ascend``
@@ -5939,123 +6506,21 @@ def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups
5939
6506
  Examples:
5940
6507
  >>> import mindspore
5941
6508
  >>> import numpy as np
5942
- >>> from mindspore import Tensor, ops
5943
- >>> from mindspore.ops.function.nn_func import conv2d_ext
6509
+ >>> from mindspore import Tensor, ops, mint
5944
6510
  >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
5945
6511
  >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
5946
- >>> output = conv2d_ext(x, weight)
6512
+ >>> output = mint.nn.functional.conv2d(x, weight)
5947
6513
  >>> print(output.shape)
5948
6514
  (10, 32, 30, 30)
5949
6515
  """
5950
-
5951
- def _convolution_same(input, weight, bias, dilation, groups):
5952
- """ convolution when mode is 'same' """
5953
- if isinstance(dilation, int):
5954
- dilation = (dilation,) * 2
5955
- validator.check_int(len(weight.shape), 4, validator.EQ, "weight.shape", 'conv2d')
5956
- validator.check_int(len(dilation), 2, validator.EQ, "dilation", 'conv2d')
5957
-
5958
- # Calc padding info
5959
- need_pad_nd, pad_l, pad_r = _get_pad_info(dilation, weight)
5960
- if not need_pad_nd:
5961
- conv = _get_cache_prim(Convolution)(stride, pad_l, dilation, False, (0, 0), groups)
5962
- return conv(input, weight, bias)
5963
-
5964
- # Calc pad nd info
5965
- pad_nd, pad_l = _get_pad_nd_info(pad_l, pad_r)
5966
- pad_nd_op = _get_cache_prim(ConstantPadND)()
5967
- padded_input = pad_nd_op(input, pad_nd, 0)
5968
- conv = _get_cache_prim(Convolution)(stride, pad_l, dilation, False, (0, 0), groups)
5969
- return conv(padded_input, weight, bias)
5970
-
5971
- if isinstance(padding, int):
5972
- padding = (padding,) * 2
5973
-
5974
- if isinstance(padding, (tuple, list)):
5975
- conv = _get_cache_prim(Convolution)(stride, padding, dilation, False, (0, 0), groups)
5976
- return conv(input, weight, bias)
6516
+ if isinstance(padding, (int, tuple, list)):
6517
+ return conv2d_ext_op(input, weight, bias, stride, padding, dilation, groups)
5977
6518
  if isinstance(padding, str):
5978
- if padding == 'valid':
5979
- conv = _get_cache_prim(Convolution)(stride, (0, 0), dilation, False, (0, 0), groups)
5980
- return conv(input, weight, bias)
5981
- if padding == 'same':
5982
- _check_stride_when_same_mode(stride)
5983
- return _convolution_same(input, weight, bias, dilation, groups)
5984
- raise ValueError(f"For conv2d, the parameter 'padding' must be 'same' or 'valid' when " \
5985
- f"the type of 'padding' is string.")
6519
+ return conv2d_padding_op(input, weight, bias, stride, padding, dilation, groups)
5986
6520
  raise TypeError(f"For conv2d, the parameter 'padding' must be a tuple/list " \
5987
6521
  f"or a string, but got {type(padding)}")
5988
6522
 
5989
6523
 
5990
- def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1):
5991
- r"""
5992
- Calculates a 2D transposed convolution, which can be regarded as Conv2d for the gradient of the input,
5993
- also called deconvolution (although it is not an actual deconvolution).
5994
-
5995
- The input is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
5996
- where :math:`N` is batch size, :math:`C_{in}` is space dimension,
5997
- :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively.
5998
-
5999
- When Conv2d and Conv2dTranspose are initialized with the same parameters, and `pad_mode` is set to 'pad',
6000
- :math:`dilation * (kernel\_size - 1) - padding` amount of zero will be paded to the height and width
6001
- directions of the input, they are inverses of each other in regard to the input and output shapes in this case.
6002
- However, when `stride` > 1, Conv2d maps multiple input shapes to the same output shape. Deconvolutional network
6003
- can refer to `Deconvolutional Networks <https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf>`_.
6004
-
6005
- Args:
6006
- input (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
6007
- weight (Tensor): Tensor of shape
6008
- :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
6009
- is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
6010
- bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
6011
- When bias is ``None`` , zeros will be used. Default: ``None`` .
6012
- stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
6013
- the height and width of movement are both strides, or a tuple of two int numbers that
6014
- represent height and width of movement respectively. Default: ``1`` .
6015
- padding (Union(int, tuple[int], list[int]), optional): Implicit paddings on both sides of the input `x`.
6016
- Can be an integer or a tuple/list with 2 integers.
6017
- output_padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the output.
6018
- The data type is an integer or a tuple of two integers. If `output_padding` is an integer,
6019
- then the bottom and right padding are all equal to `output_padding`. If `output_padding` is a tuple of
6020
- 2 integers, then the bottom and right padding is equal to `output_padding[0]`, `output_padding[1]`
6021
- respectively.
6022
- groups (int, optional): Splits `input` into groups. Default: ``1`` .
6023
- dilation (Union(int, tuple[int]), optional): Gaps between kernel elements.The data type is int or a tuple of
6024
- 2 integers. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
6025
- there will be :math:`k - 1` pixels skipped for each sampling location. Its value must
6026
- be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: ``1`` .
6027
-
6028
- Returns:
6029
- Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
6030
- To see how different pad modes affect the output shape, please refer to
6031
- :class:`mindspore.nn.Conv2dTranspose` for more details.
6032
-
6033
-
6034
- Raises:
6035
- TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
6036
- TypeError: `groups` is not an int.
6037
- TypeError: If `bias` is not a Tensor.
6038
- ValueError: If the shape of `bias` is not :math:`(C_{out})` .
6039
- ValueError: If `stride` or `dilation` is less than 1.
6040
- ValueError: If `padding` is a tuple/list whose length is not equal to 2.
6041
-
6042
- Supported Platforms:
6043
- ``Ascend``
6044
-
6045
- Examples:
6046
- >>> import mindspore
6047
- >>> import numpy as np
6048
- >>> from mindspore import Tensor, ops
6049
- >>> x = Tensor(np.ones([1, 6, 32, 32]), mindspore.float32)
6050
- >>> weight = Tensor(np.ones([6, 3, 5, 5]), mindspore.float32)
6051
- >>> output = ops.conv_transpose2d(x, weight)
6052
- >>> print(output.shape)
6053
- (1, 3, 36, 36)
6054
- """
6055
- conv = _get_cache_prim(Convolution)(stride, padding, dilation, True, output_padding, groups)
6056
- return conv(input, weight, bias)
6057
-
6058
-
6059
6524
  def hardtanh(input, min_val=-1.0, max_val=1.0):
6060
6525
  r"""
6061
6526
  Applies the hardtanh activation function element-wise. The activation function is defined as:
@@ -6742,10 +7207,10 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
6742
7207
  :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
6743
7208
  where :math:`\text{kernel_size[0]}` , :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are the depth,
6744
7209
  height and width of the kernel, respectively.
6745
- If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
6746
- will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
7210
+ If we consider the input and output channels as well as the `groups` parameter, the complete kernel shape
7211
+ will be :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]},
6747
7212
  \text{kernel_size[1]}, \text{kernel_size[2]})`,
6748
- where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
7213
+ where `groups` is the number of groups dividing `x`'s input channel when applying group convolution.
6749
7214
 
6750
7215
  For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
6751
7216
  <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
@@ -6817,69 +7282,211 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
6817
7282
  {\text{stride[2]}} + 1} \right \rfloor \\
6818
7283
  \end{array}
6819
7284
 
6820
- `pad_mode` is ``"pad"``:
7285
+ `pad_mode` is ``"pad"``:
7286
+
7287
+ .. math::
7288
+ \begin{array}{ll} \\
7289
+ D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
7290
+ \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
7291
+ H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
7292
+ \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
7293
+ W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
7294
+ \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
7295
+ \end{array}
7296
+
7297
+ Raises:
7298
+ TypeError: If `out_channel` or `groups` is not an int.
7299
+ TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
7300
+ TypeError: If `bias` is not a Tensor.
7301
+ ValueError: If the shape of `bias` is not :math:`(C_{out})`.
7302
+ ValueError: If `stride` or `dilation` is less than 1.
7303
+ ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
7304
+ ValueError: If `padding` is a tuple or list whose length is not equal to 3.
7305
+ ValueError: If `pad_mode` is not equal to 'pad' and `pad` is greater than 0.
7306
+
7307
+ Supported Platforms:
7308
+ ``Ascend`` ``GPU``
7309
+
7310
+ Examples:
7311
+ >>> import mindspore
7312
+ >>> import numpy as np
7313
+ >>> from mindspore import Tensor, ops
7314
+ >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float16)
7315
+ >>> weight = Tensor(np.ones([32, 3, 4, 3, 3]), mindspore.float16)
7316
+ >>> output = ops.conv3d(x, weight, pad_mode="same", padding=0, stride=1, dilation=1, groups=1)
7317
+ >>> print(output.shape)
7318
+ (16, 32, 10, 32, 32)
7319
+ >>> output = ops.conv3d(x, weight, pad_mode="valid", padding=0, stride=1, dilation=1, groups=1)
7320
+ >>> print(output.shape)
7321
+ (16, 32, 7, 30, 30)
7322
+ >>> output = ops.conv3d(x, weight, pad_mode="pad", padding=(2, 1, 1), stride=1, dilation=1, groups=1)
7323
+ >>> print(output.shape)
7324
+ (16, 32, 11, 32, 32)
7325
+ """
7326
+ weight_shape = weight.shape
7327
+ out_channel = weight_shape[0]
7328
+ kernel_size = weight_shape[2:5]
7329
+ if isinstance(stride, (tuple, list)):
7330
+ _check_conv_iterable_lengths(stride, dim=3, iter_name='stride')
7331
+ if isinstance(dilation, (tuple, list)):
7332
+ _check_conv_iterable_lengths(dilation, dim=3, iter_name='dilation')
7333
+ input_shape = input.shape
7334
+ in_channel = input_shape[1]
7335
+ if not (in_channel % groups == 0 and out_channel % groups == 0):
7336
+ raise ValueError("The argument 'groups' should be divisible by 'in_channel' " \
7337
+ "and 'out_channel'")
7338
+ if isinstance(padding, (list, tuple)):
7339
+ padding = _manipulate_padding(padding, dim=3)
7340
+ conv = _get_cache_prim(P.Conv3D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCDHW")
7341
+ if bias is None:
7342
+ return conv(input, weight)
7343
+ if not isinstance(bias, Tensor):
7344
+ raise TypeError(f"For 'conv3d', the 'bias' must be a Tensor, but got {type(bias)}.")
7345
+ conv_result = conv(input, weight)
7346
+ output = bias_add(conv_result, bias)
7347
+ return output
7348
+
7349
+
7350
+ def conv3d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
7351
+ r"""
7352
+ Applies a 3D convolution over an input tensor. The input tensor is typically of
7353
+ shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` or :math:`(C_{in}, D_{in}, H_{in}, W_{in})`,
7354
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`D, H, W` are the depth,
7355
+ height and width of the feature graph, respectively.
7356
+
7357
+ The output is calculated based on formula:
7358
+
7359
+ .. math::
7360
+
7361
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
7362
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
7363
+
7364
+ where :math:`bias` is the output channel bias, :math:`ccor` is
7365
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_
7366
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
7367
+
7368
+ Here are the indices' meanings:
7369
+
7370
+ - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
7371
+ where :math:`N` is the batch size of the input.
7372
+
7373
+ - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`,
7374
+ where :math:`C_{out}` is the number of
7375
+ output channels, which is also equal to the number of kernels.
7376
+
7377
+ - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`,
7378
+ where :math:`C_{in}` is the number of
7379
+ input channels, which is also equal to the number of channels in the convolutional kernels.
7380
+
7381
+ Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th
7382
+ output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional
7383
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
7384
+ channel in the :math:`i`-th batch of the input feature map.
7385
+
7386
+ The shape of the convolutional kernel is given by :math:`(kd, kh, kw)` where :math:`kd` , :math:`kd` and\
7387
+ :math:`kw` are the depth, height and width of the kernel, respectively.
7388
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
7389
+ will be :math:`(C_{out}, C_{in} / \text{group}, kd, kh, kw)`,
7390
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
7391
+
7392
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
7393
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
7394
+
7395
+ The following lists some of the limitations of the parameters.
7396
+
7397
+ - input -- The input to the conv3d. The input must have each dimension size within the range [1, int32_max].
7398
+ - weight -- Filters of shape :math:`(C_{out}, C_{in} / groups, kd, kh, kw)`. The value of :math:`kh`
7399
+ and :math:`kw` is in the range [1, 511]. The remaining values are in the range [1, int32_max].
7400
+ And :math:`kh*kw*k0` is less 65536 (k0 is 16. If data type is float32, k0 is 8).
7401
+ - bias -- Bias Tensor with shape :math:`(C_{out})`. The shape must equal the first dimension of the weight.
7402
+ - stride -- The distance of kernel moving. It can be an int number or
7403
+ tuple (noted by :math:`(stride_d, stride_h, stride_w)`). stride_h and stride_w are in the range [1, 63].
7404
+ stride_d is in the range [1, 255].
7405
+ - padding -- If padding is an int number, it is in the range [0, 255].
7406
+ - dilation -- The value is in the range [1, 255].
7407
+ - groups -- The value is in the range [1, 65535].
7408
+ - :math:`C_{in} \% \text{groups} == 0 \quad \text{and} \quad C_{out} \% \text{groups} == 0` .
7409
+ - :math:`weight[1] == C_{in} / groups` .
7410
+ - :math:`H_{in} + PadUp + PadDown >= (kh - 1) * DilationH + 1` .
7411
+ - :math:`W_{in} + PadLeft + PadRight >= (kw - 1) * DilationW + 1` .
7412
+ - :math:`D_{in} + PadFront + PadBack >= (kd - 1) * DilationD + 1` .
7413
+ - :math:`H_{out} = (H_{in} + PadUp + PadDown - ((kh - 1) * DilationH + 1)) / StrideH + 1` .
7414
+ - :math:`W_{out} = (W_{in} + PadLeft + PadRight - ((kw - 1) * DilationW + 1)) / StrideW + 1` .
7415
+ - :math:`D_{out} = (D_{in} + PadFront + PadBack - ((kd - 1) * DilationD + 1)) / StrideD + 1` .
7416
+ - :math:`(D_{in}+PadFront+PadBack - ((kd-1)*DilationD+1)) /% StrideD <= PadBack` .
7417
+ - :math:`(H_{in}+PadUp+PadDown - ((kh-1)*Dilationh+1)) /% StrideH <= PadDown` .
7418
+ - :math:`stride_d <= kernel_d` .
7419
+ - :math:`PadUp < kh` and :math:`PadDown < kh` . When `padding` = ``'valid'``, both PadUp and PadDown are zeros.
7420
+ When `padding` = ``'same'``, pad can be calculated by
7421
+ :math:`floor(((H_{out}-1) * strideH + (kh - 1) * DilationH + 1 - H_{in}) / 2)` for high dimension.
7422
+ It is similar way to calculate the padding for depth and width dimension. And the depth and width
7423
+ dimensions also have the same constraints.
7424
+ - :math:`((kh - 1) * DilationH - PadUp)` should be in [0, 255]. It is the same constraint for depth
7425
+ and width dimension.
7426
+ - If `padding` is ``'same'``, `stride` must be 1.
7427
+
7428
+ .. warning::
7429
+ This API does not support Atlas series products.
7430
+ This is an experimental API that is subject to change or deletion.
7431
+
7432
+ Args:
7433
+ input (Tensor): Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
7434
+ weight (Tensor): Set size of kernel is :math:`(kd, kh,
7435
+ kw)`, then the shape is :math:`(C_{out}, C_{in} / groups, kd, kh, kw)`.
7436
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
7437
+ When bias is ``None`` , zeros will be used. Default: ``None`` .
7438
+ stride (Union(int, tuple[int], list[int]), optional): The distance of kernel moving, an int
7439
+ number that represents the depth, the height and width of movement are both strides, or a
7440
+ tuple of triple int numbers that
7441
+ represent the depth, height and width of movement respectively. Default: ``1`` .
7442
+ padding (Union(int, tuple[int], list[int], str), optional): Implicit paddings on both sides of the input `x`.
7443
+ Can be a string, one integer or a tuple/list with 3 integers.
7444
+ If `padding` is a string, the optional values are ``"same"`` , ``"valid"``.
7445
+
7446
+ - same: Adopts the way of completion. The height and width of the output will be equal to
7447
+ the input `x` divided by stride. The padding will be evenly calculated in top and bottom,
7448
+ left and right possiblily. Otherwise, the last extra padding will be calculated from the bottom
7449
+ and the right side. If this mode is set, `stride` must be 1.
7450
+
7451
+ - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
7452
+ without padding. Extra pixels will be discarded.
7453
+
7454
+ If `padding` is one integer, the paddings of top, bottom, left and right are the same, equal to padding.
7455
+ If `padding` is a tuple/list with 3 integers, the padding of head, tail, top, bottom,
7456
+ left and right equal to pad[0], pad[0], pad[1], pad[1], pad[2] and pad[2] correspondingly. Default: ``0`` .
7457
+ dilation (Union[int, tuple[int], list[int]], optional): Controlling the space between the kernel points.
7458
+ Default: ``1`` .
7459
+ groups (int, optional): Splits `input` into groups. Default: ``1`` .
6821
7460
 
6822
- .. math::
6823
- \begin{array}{ll} \\
6824
- D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
6825
- \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
6826
- H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
6827
- \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
6828
- W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
6829
- \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
6830
- \end{array}
7461
+ Returns:
7462
+ Tensor, the same dtype as the `input`, with the shape :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
7463
+ or :math:`(C_{out}, D_{out}, H_{out}, W_{out})`.
6831
7464
 
6832
7465
  Raises:
6833
- TypeError: If `out_channel` or `groups` is not an int.
6834
7466
  TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
7467
+ TypeError: `groups` is not an int.
6835
7468
  TypeError: If `bias` is not a Tensor.
6836
- ValueError: If the shape of `bias` is not :math:`(C_{out})`.
6837
- ValueError: If `stride` or `dilation` is less than 1.
6838
- ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
6839
- ValueError: If `padding` is a tuple or list whose length is not equal to 3.
6840
- ValueError: If `pad_mode` is not equal to 'pad' and `pad` is greater than 0.
6841
7469
 
6842
7470
  Supported Platforms:
6843
- ``Ascend`` ``GPU``
7471
+ ``Ascend``
6844
7472
 
6845
7473
  Examples:
6846
7474
  >>> import mindspore
6847
7475
  >>> import numpy as np
6848
- >>> from mindspore import Tensor, ops
6849
- >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float16)
6850
- >>> weight = Tensor(np.ones([32, 3, 4, 3, 3]), mindspore.float16)
6851
- >>> output = ops.conv3d(x, weight, pad_mode="same", padding=0, stride=1, dilation=1, groups=1)
6852
- >>> print(output.shape)
6853
- (16, 32, 10, 32, 32)
6854
- >>> output = ops.conv3d(x, weight, pad_mode="valid", padding=0, stride=1, dilation=1, groups=1)
6855
- >>> print(output.shape)
6856
- (16, 32, 7, 30, 30)
6857
- >>> output = ops.conv3d(x, weight, pad_mode="pad", padding=(2, 1, 1), stride=1, dilation=1, groups=1)
6858
- >>> print(output.shape)
6859
- (16, 32, 11, 32, 32)
7476
+ >>> from mindspore import mint
7477
+ >>> x = mindspore.Tensor(np.random.randn(12, 1, 60, 50, 8), mindspore.float16)
7478
+ >>> w = mindspore.Tensor(np.random.randn(26, 1, 2, 4, 4), mindspore.float16)
7479
+ >>> out = mint.nn.functional.conv3d(x, w)
7480
+ >>> print(out.shape)
7481
+ (12, 26, 59, 47, 5)
6860
7482
  """
6861
- weight_shape = weight.shape
6862
- out_channel = weight_shape[0]
6863
- kernel_size = weight_shape[2:5]
6864
- if isinstance(stride, (tuple, list)):
6865
- _check_conv_iterable_lengths(stride, dim=3, iter_name='stride')
6866
- if isinstance(dilation, (tuple, list)):
6867
- _check_conv_iterable_lengths(dilation, dim=3, iter_name='dilation')
6868
- input_shape = input.shape
6869
- in_channel = input_shape[1]
6870
- if not (in_channel % groups == 0 and out_channel % groups == 0):
6871
- raise ValueError("The argument 'groups' should be divisible by 'in_channel' " \
6872
- "and 'out_channel'")
6873
- if isinstance(padding, (list, tuple)):
6874
- padding = _manipulate_padding(padding, dim=3)
6875
- conv = _get_cache_prim(P.Conv3D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCDHW")
6876
- if bias is None:
6877
- return conv(input, weight)
6878
- if not isinstance(bias, Tensor):
6879
- raise TypeError(f"For 'conv3d', the 'bias' must be a Tensor, but got {type(bias)}.")
6880
- conv_result = conv(input, weight)
6881
- output = bias_add(conv_result, bias)
6882
- return output
7483
+
7484
+ if isinstance(padding, (tuple, list, int)):
7485
+ return conv3d_ext_op(input, weight, bias, stride, padding, dilation, groups)
7486
+ if isinstance(padding, str):
7487
+ return conv3d_padding_op(input, weight, bias, stride, padding, dilation, groups)
7488
+ raise TypeError(f"For conv3d, the parameter 'padding' must be a tuple/list " \
7489
+ f"or a string, but got {type(padding)}")
6883
7490
 
6884
7491
 
6885
7492
  @_primexpr
@@ -7062,6 +7669,50 @@ def glu(x, axis=-1):
7062
7669
  return x * y
7063
7670
 
7064
7671
 
7672
+ def glu_ext(input, dim=-1):
7673
+ r"""
7674
+ Computes GLU (Gated Linear Unit activation function) of the input tensor.
7675
+
7676
+ .. math::
7677
+ {GLU}(a, b)= a \otimes \sigma(b)
7678
+
7679
+ where :math:`a` is the first half of the `input` Tensor after `input` is split and :math:`b` is the second half.
7680
+
7681
+ Here :math:`\sigma` is the sigmoid function, and :math:`\otimes` is the Hadamard product.
7682
+ See `Language Modeling with Gated Convluational Networks <https://arxiv.org/abs/1612.08083>`_.
7683
+
7684
+ Args:
7685
+ input (Tensor): Tensor to be calculated. Dtype is floating point and the shape
7686
+ is :math:`(\ast_1, N, \ast_2)` where `*` means, any number of additional dimensions. :math:`N`
7687
+ is required to be an even number, where :math:`N` is the size of `input` on the dimension
7688
+ selected by `dim`.
7689
+ dim (int, optional): The dimension to split the input `input`. The value range is `[-r, r)` where `r`
7690
+ is the number of dimensions of `input`. Default: ``-1`` , the last dimension in `input`.
7691
+
7692
+ Returns:
7693
+ Tensor, the same dtype as the input `input`. The shape is :math:`(\ast_1, M, \ast_2)` where :math:`M=N/2`.
7694
+
7695
+ Raises:
7696
+ TypeError: If `input` is not a Tensor or `dim` is not an int.
7697
+ IndexError: If the value of `dim` is out of the range of `[-r, r)`, where `r` is the number
7698
+ of dimensions of `input`.
7699
+ RuntimeError: If dtype of `input` is not supported.
7700
+ RuntimeError: If the length of `input` in the dimension selected by `dim` is not even.
7701
+
7702
+ Supported Platforms:
7703
+ ``Ascend`` ``CPU``
7704
+
7705
+ Examples:
7706
+ >>> from mindspore import Tensor, ops
7707
+ >>> input = Tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]])
7708
+ >>> output = ops.function.nn_func.glu_ext(input)
7709
+ >>> print(output)
7710
+ [[0.05744425 0.11973753]
7711
+ [0.33409387 0.41398472]]
7712
+ """
7713
+ return _get_cache_prim(P.GLU)(axis=dim)(input)
7714
+
7715
+
7065
7716
  def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean'):
7066
7717
  r"""
7067
7718
  Hinge loss for optimizing a multi-class classification.
@@ -7222,7 +7873,8 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
7222
7873
  Args:
7223
7874
  input (Tensor): A tensor of shape :math:`(N, C)` , where N is batch size and C is number of classes.
7224
7875
  target (Tensor): The label target Tensor which has the same shape as `input`.
7225
- weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: ``None``.
7876
+ weight (Union[Tensor, int, float], optional): The manual rescaling weight given to each class.
7877
+ Default: ``None``.
7226
7878
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
7227
7879
  ``'sum'`` . Default: ``'mean'`` .
7228
7880
 
@@ -7291,7 +7943,8 @@ def gelu(input, approximate='none'):
7291
7943
 
7292
7944
  Args:
7293
7945
  input (Tensor): The input of the activation function GeLU, the data type is float16, float32 or float64.
7294
- approximate (str): the gelu approximation algorithm to use. Acceptable vaslues are ``'none'`` and ``'tanh'`` .
7946
+ approximate (str, optional): the gelu approximation algorithm to use.
7947
+ Acceptable vaslues are ``'none'`` and ``'tanh'`` .
7295
7948
  Default: ``'none'`` .
7296
7949
 
7297
7950
  Returns:
@@ -7309,7 +7962,7 @@ def gelu(input, approximate='none'):
7309
7962
  >>> import mindspore
7310
7963
  >>> from mindspore import Tensor, ops
7311
7964
  >>> x = Tensor([1.0, 2.0, 3.0], mindspore.float32)
7312
- >>> result = ops.gelu(x)
7965
+ >>> result = ops.gelu(x, approximate='none')
7313
7966
  >>> print(result)
7314
7967
  [0.8413447 1.9544997 2.9959505]
7315
7968
  """
@@ -7334,33 +7987,23 @@ def gelu(input, approximate='none'):
7334
7987
  def channel_shuffle(x, groups):
7335
7988
  r"""
7336
7989
  Divide the channels in a tensor of shape :math:`(*, C, H, W)` into :math:`g` groups and
7337
- rearrange them as :math:`(*, \frac{C}{g}, g, H*W)`, while keeping the original tensor shapes.
7990
+ rearrange them as :math:`(*, \frac{C}{g}, g, H*W)`, while retaining the original tensor
7991
+ shape in the final output.
7338
7992
 
7339
7993
  Args:
7340
- x (Tensor): Tensor to be divided, it has shape :math:`(*, C, H, W)`,
7341
- with float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 data type.
7994
+ x (Tensor): The input tensor.
7342
7995
  groups (int): Number of groups to divide channels in.
7343
7996
 
7344
7997
  Returns:
7345
- A Tensor, has the same type as the `x`, and has the shape :math:`(*, C, H, W)`.
7346
-
7347
- Raises:
7348
- TypeError: If data type of `x` is not one of the following:
7349
- float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
7350
- TypeError: If dim of `x` is < 4.
7351
- TypeError: If `groups` is not a positive number.
7352
- ValueError: If channel number of `x` is not divisible by `groups`.
7998
+ Tensor
7353
7999
 
7354
8000
  Supported Platforms:
7355
8001
  ``Ascend`` ``CPU``
7356
8002
 
7357
8003
  Examples:
7358
8004
  >>> import mindspore
7359
- >>> import numpy as np
7360
- >>> from mindspore import Tensor, ops
7361
- >>> group = 2
7362
- >>> x = Tensor(np.arange(1* 4 * 2 * 2).reshape(1, 4, 2, 2).astype(np.int16))
7363
- >>> y = mindspore.ops.channel_shuffle(x, group)
8005
+ >>> x = mindspore.tensor(mindspore.ops.arange(0, 16, dtype=mindspore.int16).reshape(1, 4, 2, 2))
8006
+ >>> y = mindspore.ops.channel_shuffle(x, groups=2)
7364
8007
  >>> print(y)
7365
8008
  [[[[ 0 1]
7366
8009
  [ 2 3]]
@@ -7550,6 +8193,96 @@ def lp_pool2d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
7550
8193
  return ((sign(out) * ops.relu(ops.abs(out))) * (kernel_size[0] * kernel_size[1])).pow(1.0 / norm_type)
7551
8194
 
7552
8195
 
8196
+ def relu(input, inplace=False):
8197
+ r"""
8198
+ Computes ReLU (Rectified Linear Unit activation function) of input tensors element-wise.
8199
+
8200
+ It returns :math:`\max(input,\ 0)` element-wise. Specially, the neurons with the negative output
8201
+ will be suppressed and the active neurons will stay the same.
8202
+
8203
+ .. math::
8204
+
8205
+ ReLU(input) = (input)^+ = \max(0, input)
8206
+
8207
+ ReLU Activation Function Graph:
8208
+
8209
+ .. image:: ../images/ReLU.png
8210
+ :align: center
8211
+
8212
+ Args:
8213
+ input (Tensor): The input Tensor.
8214
+ inplace (bool, optional): Whether to use inplace mode, Defaults to ``False``.
8215
+
8216
+ Returns:
8217
+ Tensor, with the same dtype and shape as the `input`.
8218
+
8219
+ Raises:
8220
+ TypeError: If dtype of `input` is not Number type.
8221
+ TypeError: If `input` is not a Tensor.
8222
+
8223
+ Supported Platforms:
8224
+ ``Ascend`` ``GPU`` ``CPU``
8225
+
8226
+ Examples:
8227
+ >>> import mindspore
8228
+ >>> import numpy as np
8229
+ >>> from mindspore import Tensor, ops
8230
+ >>> input = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
8231
+ >>> output = ops.relu(input)
8232
+ >>> print(output)
8233
+ [[0. 4. 0.]
8234
+ [2. 0. 9.]]
8235
+ """
8236
+ if inplace:
8237
+ return inplace_relu_op(input)
8238
+ return relu_op(input)
8239
+
8240
+
8241
+ def relu_(input):
8242
+ r"""
8243
+ ReLuComputes ReLU (Rectified Linear Unit activation function) inplace of input tensors element-wise.
8244
+
8245
+ It returns :math:`\max(input,\ 0)` element-wise. Specially, the neurons with the negative output
8246
+ will be suppressed and the active neurons will stay the same.
8247
+
8248
+ .. math::
8249
+
8250
+ ReLU(input) = (input)^+ = \max(0, input)
8251
+
8252
+ ReLU Activation Function Graph:
8253
+
8254
+ .. image:: ../images/ReLU.png
8255
+ :align: center
8256
+
8257
+ .. warning::
8258
+ This is an experimental API that is subject to change or deletion.
8259
+
8260
+ Args:
8261
+ input (Tensor): The input Tensor.
8262
+
8263
+ Returns:
8264
+ Tensor, with the same dtype and shape as the `input`.
8265
+
8266
+ Raises:
8267
+ TypeError: If dtype of `input` is not Number type.
8268
+ TypeError: If `input` is not a Tensor.
8269
+
8270
+ Supported Platforms:
8271
+ ``Ascend``
8272
+
8273
+ Examples:
8274
+ >>> import mindspore
8275
+ >>> import numpy as np
8276
+ >>> from mindspore import Tensor, ops
8277
+ >>> input = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
8278
+ >>> ops.relu_(input)
8279
+ >>> print(input)
8280
+ [[0. 4. 0.]
8281
+ [2. 0. 9.]]
8282
+ """
8283
+ return inplace_relu_op(input)
8284
+
8285
+
7553
8286
  def mse_loss(input, target, reduction='mean'):
7554
8287
  r"""
7555
8288
  Calculates the mean squared error between the predicted value and the label value.
@@ -7623,35 +8356,36 @@ def mse_loss(input, target, reduction='mean'):
7623
8356
 
7624
8357
  def msort(input):
7625
8358
  r"""
7626
- Sorts the elements in Tensor in ascending order of value along its first dimension.
7627
-
7628
- ops.msort(t) is equivalent to ops.Sort(axis=0)(t)[0]. See also :class:`mindspore.ops.Sort()`.
8359
+ Return a tensor obtained by sorting the input tensor in ascending order along its first dimension.
7629
8360
 
7630
- .. Note::
7631
- The Ascend backend only supports sorting the 1D input.
8361
+ `ops.msort(input)` is equivalent to `ops.sort(axis=0)(input)[0]`. See also :class:`mindspore.ops.Sort()` for more
8362
+ details.
7632
8363
 
7633
8364
  Args:
7634
- input (Tensor): The input to sort, with float16 or float32 data type.
8365
+ input (Tensor): The input tensor to sort.
7635
8366
 
7636
8367
  Returns:
7637
- A tensor whose values are the sorted values, with the same shape and data type as input.
7638
-
7639
- Raises:
7640
- TypeError: If dtype of `input` is neither float16 nor float32.
8368
+ Tensor
7641
8369
 
7642
8370
  Supported Platforms:
7643
8371
  ``Ascend`` ``GPU`` ``CPU``
7644
8372
 
7645
8373
  Examples:
7646
- >>> import mindspore as ms
7647
- >>> from mindspore import ops
7648
- >>> import numpy as np
7649
- >>> input = ms.Tensor(np.array([[8, 2, 1], [5, 9, 3], [4, 6, 7]]), ms.float16)
7650
- >>> output = ops.msort(input)
7651
- >>> print(output)
7652
- [[4. 2. 1.]
7653
- [5. 6. 3.]
7654
- [8. 9. 7.]]
8374
+ >>> import mindspore
8375
+ >>> input = mindspore.tensor([[8, 2, 1],
8376
+ ... [5, 9, 3],
8377
+ ... [4, 6, 7]])
8378
+ >>> mindspore.ops.msort(input)
8379
+ Tensor(shape=[3, 3], dtype=Int64, value=
8380
+ [[4, 2, 1],
8381
+ [5, 6, 3],
8382
+ [8, 9, 7]])
8383
+ >>> # is equivalent to `ops.sort(axis=0)(input)[0]`
8384
+ >>> mindspore.ops.sort(input, axis=0)[0]
8385
+ Tensor(shape=[3, 3], dtype=Int64, value=
8386
+ [[4, 2, 1],
8387
+ [5, 6, 3],
8388
+ [8, 9, 7]])
7655
8389
  """
7656
8390
  return ops.Sort(axis=0)(input)[0]
7657
8391
 
@@ -7667,7 +8401,8 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
7667
8401
  as `anchor`.
7668
8402
  negative (Tensor): A sample belonging to the different class from `anchor`, with the same type and shape
7669
8403
  as `anchor`.
7670
- margin (float, optional): Make a margin between the positive pair and the negative pair. Default: ``1.0`` .
8404
+ margin (float, optional): Make a margin between the positive pair and the negative pair. The shape of margin
8405
+ must be 0. Default: ``1.0`` .
7671
8406
  p (int, optional): The degree of norm for pairwise distance. Default: ``2`` .
7672
8407
  eps (float, optional): Add small value to avoid division by zero. Default: ``1e-06``.
7673
8408
  swap (bool, optional): The distance swap change the negative distance to the distance between positive
@@ -8131,7 +8866,7 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
8131
8866
  return out
8132
8867
 
8133
8868
 
8134
- def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, *, ceil_mode=False, return_indices=False):
8869
+ def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False):
8135
8870
  r"""
8136
8871
  Performs a 2D max pooling on the input Tensor.
8137
8872
 
@@ -8153,21 +8888,23 @@ def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, *, ce
8153
8888
  kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg
8154
8889
  value, is an int number that represents height and width of the kernel, or a tuple of
8155
8890
  two int numbers that represent height and width respectively.
8156
- stride (Union[int, tuple[int], None]): The distance of kernel moving, an int number that represents
8891
+ stride (Union[int, tuple[int], None], optional): The distance of kernel moving, an int number that represents
8157
8892
  the height and width of movement are both stride, or a tuple of two int numbers that
8158
8893
  represent height and width of movement respectively.
8159
8894
  Default: ``None`` , which indicates the moving step is `kernel_size` .
8160
- padding (Union[int, tuple[int]]): An int number that represents the height and width of movement are both
8895
+ padding (Union[int, tuple[int]], optional):
8896
+ An int number that represents the height and width of movement are both
8161
8897
  strides, or a tuple of two int numbers that represent height and width of movement respectively.
8162
8898
  Default: ``0`` .
8163
- dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: ``1`` .
8164
- ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
8165
- return_indices (bool): Whether to output the indices of max value. Default: ``False`` .
8899
+ dilation (Union[int, tuple[int]], optional): Control the stride of elements in the kernel. Default: ``1`` .
8900
+ ceil_mode (bool, optional): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
8901
+ return_indices (bool, optional): Whether to output the indices of max value. Default: ``False`` .
8166
8902
 
8167
8903
  Returns:
8168
8904
  If `return_indices` is ``False`` , return a Tensor `output`, else return a tuple (`output`, `argmax`).
8169
8905
 
8170
- - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`.
8906
+ - **output** (Tensor) - Maxpooling result,
8907
+ with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`.
8171
8908
  It has the same data type as `input`.
8172
8909
 
8173
8910
  .. math::
@@ -8198,10 +8935,9 @@ def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, *, ce
8198
8935
  >>> import mindspore
8199
8936
  >>> import numpy as np
8200
8937
  >>> from mindspore import Tensor, ops
8201
- >>> from mindspore.ops.function.nn_func import max_pool2d_ext
8202
8938
  >>> input = Tensor(np.arange(20 * 16 * 50 * 32).reshape((20, 16, 50, 32)), mindspore.float32)
8203
- >>> output_tensor, argmax = max_pool2d_ext(input, kernel_size=(3, 2), stride=(2, 1),
8204
- ceil_mode=False, return_indices=True)
8939
+ >>> output_tensor, argmax = ops.function.nn_func.max_pool2d_ext(input, kernel_size=(3, 2), stride=(2, 1),
8940
+ ... ceil_mode=False, return_indices=True)
8205
8941
  >>> print(output_tensor.shape)
8206
8942
  (20, 16, 24, 31)
8207
8943
  >>> print(argmax.shape)
@@ -8219,62 +8955,199 @@ def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, *, ce
8219
8955
  return out
8220
8956
 
8221
8957
 
8222
- def prompt_flash_attention(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift,
8223
- deq_scale1, quant_scale1, deq_scale2, quant_scale2, quant_offset2, num_heads,
8224
- scale_value=1.0, pre_tokens=2147483547, next_tokens=0, input_layout='BSH',
8225
- num_key_value_heads=0, sparse_mode=0, inner_precise=1):
8958
+ def prompt_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=None, actual_seq_lengths_kv=None,
8959
+ pse_shift=None, deq_scale1=None, quant_scale1=None, deq_scale2=None, quant_scale2=None,
8960
+ quant_offset2=None, num_heads=1, scale_value=1.0, pre_tokens=2147483647, next_tokens=0,
8961
+ input_layout='BSH', num_key_value_heads=0, sparse_mode=0, inner_precise=1):
8226
8962
  r"""
8227
8963
  The interface for fully inference.
8228
- B -- Batch size
8229
- S -- Sequence length
8230
- H -- Hidden size
8964
+
8965
+ - B: Batch size
8966
+ - N: Num of attention heads
8967
+ - S: Sequence length
8968
+ - D: Head dim
8969
+ - H: Hidden layer size
8970
+
8971
+ Self attention constructs an attention model based on the relationship between input samples themselves. The
8972
+ principle is to assume that there is an input sample sequence :math:`x` of length :math:`n`, and each
8973
+ element of :math:`x` is a :math:`d` dimensional vector, which can be viewed as a token embedding. This sequence
8974
+ can be transformed through 3 weight matrices to obtain 3 matrices with dimensions of :math:`n\times d`.
8975
+
8976
+ The self attention calculation formula is defined as:
8977
+
8978
+ .. math::
8979
+ Attention(Q,K,V)=Softmax(\frac{QK^{T} }{\sqrt{d} } )V
8980
+
8981
+ where the product of :math:`Q` and :math:`K^{T}` represents the attention of input :math:`x`. To avoid the value
8982
+ becoming too large, it is usually scaled by dividing it by the square root of :math:`d` and perform softmax
8983
+ normalization on each row, yields a matrix of :math:`n\times d` after multiplying :math:`V`.
8984
+
8985
+ .. warning::
8986
+ - Support dtype of float16 for `attn_mask` will be deprecated in the future.
8987
+ - When `sparse_mode` is 2, 3 or 4, the shape of `attn_mask` must be :math:`(2048, 2048)` /
8988
+ :math:`(B, 1, 2048, 2048)` / :math:`(1, 1, 2048, 2048)`.
8231
8989
 
8232
8990
  Note:
8233
- experiment ops
8991
+ - Maximum Support for each axis
8992
+
8993
+ - Supports B-axis values less than or equal to 65536 (64k).
8994
+ When the input type includes int8 with D-axis not aligned to 32, or the input type is
8995
+ float16 or bfloat16 with D-axis not aligned to 16, the B-axis supports up to 128 only.
8996
+ - Supports N-axis values less than or equal to 256.
8997
+ - Supports S-axis values less than or equal to 20971520 (20M).
8998
+ - Supports D-axis values less than or equal to 512.
8999
+
9000
+ - Quantization
9001
+
9002
+ - int8 Input, int8 Output: Parameters `deq_scale1`, `quant_scale1`, `deq_scale2`, and `quant_scale2`
9003
+ must all be provided. `quant_offset2` is optional (default is 0 if not provided).
9004
+ - int8 Input, float16 Output: Parameters `deq_scale1`, `quant_scale1`, and `deq_scale2` must all be provided.
9005
+ If `quant_offset2` or `quant_scale2` is provided (i.e., not null), it will result in an error.
9006
+ - float16 or bfloat16 Input, int8 Output: Parameter `quant_scale2` must be provided. `quant_offset2` is
9007
+ optional (default is 0 if not provided). If `deq_scale1`, `quant_scale1`, or `deq_scale2` is
9008
+ provided (i.e., not null), it will result in an error.
9009
+ - int8 Output:
9010
+
9011
+ - `quant_scale2` and `quant_offset2` in per-channel format do not support scenarios with
9012
+ left padding, Ring Attention, or non-32-byte aligned D-axis.
9013
+ - In GE mode: `quant_scale2` and `quant_offset2` in per-tensor format do not support scenarios
9014
+ with non-32-byte aligned D-axis.
9015
+ - Does not support sparse as band and `pre_tokens`/`next_tokens` being negative.
9016
+
9017
+ - `quant_scale2` and `quant_offset2` can be bfloat16 only when `query` is bfloat16.
9018
+
9019
+
9020
+ - Other Usage Caveats:
9021
+
9022
+ - :math:`N` of parameter `query` must be equal to `num_heads`. :math:`N` of parameter `key` and parameter
9023
+ `value` must be equal to `num_key_value_heads`.
9024
+ - `num_heads` must be divisible by `num_key_value_heads` and `num_heads` divided by `num_key_value_heads`
9025
+ can not be greater than 64.
9026
+ - When `query` dtype is bfloat16, D axis should align with 16.
9027
+ - Each element of `actual_seq_lengths` must not exceed q_S and element
9028
+ of `actual_seq_lengths_kv` must not exceed kv_S.
8234
9029
 
8235
9030
  .. warning::
8236
- This is an experimental API that is subject to change or deletion.
9031
+ Only support on Atlas A2 training series.
9032
+
9033
+ Args:
9034
+ query (Tensor): The query tensor with data type of int8, float16 or bfloat16.
9035
+ The shape is :math:`(B, q_S, q_H)` / `(B, q_N, q_S, q_D)`.
9036
+ key (Tensor): The key tensor with the same dtype as `query`.
9037
+ The shape is :math:`(B, kv_S, kv_H)` / `(B, kv_N, kv_S, kv_D)`.
9038
+ value (Tensor): The value tensor with the same dtype as `query`.
9039
+ The shape is :math:`(B, kv_S, kv_H)` / `(B, kv_N, kv_S, kv_D)`.
9040
+ attn_mask (Tensor, optional) - The attention mask tensor with data type of bool, int8, uint8 or float16.
9041
+ For each element, 0/False indicates retention and 1/True indicates discard.
9042
+ If `sparse_mode` is 0 or 1: the shape is :math:`(q_S, kv_S)` / :math:`(B, q_S, kv_S)` /
9043
+ :math:`(1, q_S, kv_S)` / :math:`(B, 1, q_S, kv_S)` / :math:`(1, 1, q_S, kv_S)`.
9044
+ If `sparse_mode` is 2, 3 or 4, the shape is :math:`(2048, 2048)` / :math:`(1, 2048, 2048)` /
9045
+ :math:`(1, 1, 2048, 2048)`.
9046
+ Default: ``None``.
9047
+ actual_seq_lengths (Union[Tensor, tuple[int], list[int]], optional): Describe actual sequence length of each
9048
+ batch of `query` with data type of int64. The shape is :math:`(B, )` and every element should be
9049
+ positive integer.
9050
+ Default: ``None``.
9051
+ actual_seq_lengths_kv (Union[Tensor, tuple[int], list[int]], optional): Describe actual sequence length of each
9052
+ batch of `key` or `value` with data type of int64. The shape is :math:`(B, )` and every element should be
9053
+ positive integer.
9054
+ Default: ``None``.
9055
+ pse_shift (Tensor, optional): The position encoding tensor with data type of float16 or bfloat16.
9056
+ Input tensor of shape :math:`(B, N, q_S, kv_S)` / :math:`(1, N, q_S, kv_S)`.
9057
+ Default: ``None``.
8237
9058
 
8238
- Inputs:
8239
- query (Tensor) - The query tensor with data type of float16 or float32.
8240
- Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
8241
- key (Tensor) - The key tensor with data type of float16 or float32.
8242
- Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
8243
- value (Tensor) - The value tensor with data type of float16 or float32.
8244
- Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
8245
- attn_mask (Tensor) - The attention mask tensor with data type of float16 or float32.
8246
- For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
8247
- actual_seq_lengths (Tensor): Describe actual sequence length of each input with data type of int64.
8248
- actual_seq_lengths_kv (Tensor): Describe actual sequence length of each input with data type of int64.
8249
- pse_shift (Tensor) - The position encoding tensor with data type of float16 or float32.
8250
- dep_scale1 (Tensor)
8251
- quant_scale1 (Tensor)
8252
- deq_scale2 (Tensor)
8253
- quant_scale2 (Tensor)
8254
- quant_offset2 (Tensor)
8255
- num_heads (int): The number of heads.
8256
- scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
8257
- Muls in the calculation. Default: 1.0.
8258
- pre_tokens (int): Previous tokens. Default: 2147483547.
8259
- next_tokens (int): next tokens. Default: 0.
8260
- indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
8261
- indicates that the data blocks in the upper triangle are not involved in the calculation
8262
- input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
8263
- num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
8264
- The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
8265
- sparse_mode (int): Default: 0
8266
- inner_precise (int): 0, float16 high precision. 1, high performance. default 1
9059
+ - q_S must be greater than or equal to the query's S length, and kv_S must be greater than or
9060
+ equal to the key's S length.'
9061
+ - If `pse_shift` has dtype float16, `query` should have dtype float16 or int8, in which case high
9062
+ precision mode is enabled automatically.
9063
+ - If `pse_shift` has dtype bfloat16, `query` should have dtype bfloat16.
9064
+
9065
+ deq_scale1 (Tensor, optional): Quantitative parametor, the tensor with data type of uint64 or float32.
9066
+ Input Tensor of shape :math:`(1,)`.
9067
+ Default: ``None``.
9068
+ quant_scale1 (Tensor, optional): Quantitative parametor, the tensor with data type of float32.
9069
+ Input Tensor of shape :math:`(1,)`.
9070
+ Default: ``None``.
9071
+ deq_scale2 (Tensor, optional): Quantitative parametor, input Tensor of shape :math:`(1,)` and it has
9072
+ the same dtype as `deq_scale1`.
9073
+ Default: ``None``.
9074
+ quant_scale2 (Tensor, optional): Quantitative parametor, the tensor with data type of float32 or bfloat16.
9075
+ The suggested shape is :math:`(1,)` / :math:`(1, 1, q_H)` / :math:`(q_H, )` when output layout is BSH,
9076
+ :math:`(1,)` / :math:`(1, q_N, 1, D)` / :math:`(q_N, D) when layout is BNSD.
9077
+ Default: ``None``.
9078
+ quant_offset2 (Tensor, optional): Quantitative parametor, the tensor with data type of float32 or bfloat16.
9079
+ It has the same dtype and shape as `quant_scale2`.
9080
+ Default: ``None``.
9081
+ num_heads (int, optional): The number of heads. It is an integer in range [0, 256].
9082
+ Default: ``1``.
9083
+ scale_value (double, optional): The scale value indicating the scale coefficient, which is used as the scalar of
9084
+ Muls in the calculation.
9085
+ Default: ``1.0``.
9086
+ pre_tokens (int, optional): For sparse cumputing, indicating the number of previous tokens Attention needs to
9087
+ associated with.
9088
+ Default: ``2147483647``.
9089
+ next_tokens (int, optional): For sparse cumputing, indicating the number of next tokens Attention needs to
9090
+ associated with.
9091
+ Default: ``0``.
9092
+ input_layout (str, optional): the data layout of the input qkv, support `(BSH)` and `(BNSD)`.
9093
+ Default ``BSH``.
9094
+ num_key_value_heads (int, optional): An int indicates head numbers of ``key``/``value`` which are used
9095
+ in GQA algorithm. The value 0 indicates if the key and value have the same head nums, use `num_heads`.
9096
+ It it is specified(not 0), it must be a factor of `num_heads` and it must be equal to kv_n.
9097
+ Default: ``0``.
9098
+ sparse_mode (int, optional): An int specifies sparse mode, can be int from {0, 1, 2, 3, 4}.
9099
+ Default: ``0``.
9100
+
9101
+ - sparseMode = 0: If `attn_mask` is a null pointer, `pre_tokens` and `next_tokens` inputs are ignored
9102
+ (internally set to INT_MAX).
9103
+ - sparseMode = 2, 3, 4: `attn_mask` shape must be :math:`(S, S)` or :math:`(1, S, S)` or
9104
+ :math:`(1, 1, S, S)`, with S fixed at 2048. User must ensure that `attn_mask` is lower triangular.
9105
+ If not provided or incorrect shape, it will result in an error.
9106
+ - sparseMode = 1, 2, 3: Ignores `pre_tokens`, `next_tokens` inputs and sets values according
9107
+ to specific rules.
9108
+ - sparseMode = 4: `pre_tokens` and `next_tokens` must be non-negative.
9109
+
9110
+ inner_precise (int, optional): An int number from {0, 1} indicates computing mode.
9111
+ ``0`` for high precision mode for float16 dtype. ``1`` for high performance mode.
9112
+ Default: ``1``.
8267
9113
 
9114
+ Returns:
9115
+ attention_out (Tensor) - Output tensor, has the same shape as `query` of
9116
+ :math:`(B, q_S, q_H)` / :math:`(B, q_N, q_S, q_D)`.
9117
+ Output dtype is determined by multiple factors, please refer to Note above for details.
8268
9118
 
8269
- Outputs:
8270
- attention_out (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
9119
+ Raises:
9120
+ TypeError: Dtype of `query` is not int8, float16 or bfloat16.
9121
+ TypeError: `query`, `key` and `value` don't have the same dtype.
9122
+ TypeError: Dtype of `attn_mask` is not bool, int8 or uint8.
9123
+ TypeError: Dtype of `pse_shift` is not bfloat16 or float16.
9124
+ TypeError: `scale_value` is not a double number.
9125
+ TypeError: `input_layout` is not a string.
9126
+ TypeError: `num_key_value_heads` is not an int.
9127
+ TypeError: `sparse_mode` is not an int.
9128
+ TypeError: `sparse_inner_precisemode` is not an int.
9129
+ TypeError: `quant_scale1` is not Tensor of type float32.
9130
+ TypeError: `deq_scale1` is not Tensor of type uint64 or float32.
9131
+ TypeError: `quant_scale2` is not Tensor of type float32.
9132
+ TypeError: `deq_scale2` is not Tensor of type uint64 or float32.
9133
+ TypeError: `quant_offset2` is not Tensor of type float32.
9134
+ ValueError: `input_layout` is a string but of `(BSH)` or `(BNSD)`.
9135
+ RuntimeError: `num_heads` is not divisible by `num_key_value_heads`.
9136
+ RuntimeError: `num_heads` is not greater than 0.
9137
+ RuntimeError: `num_key_value_heads` is not greater than or equal to 0.
9138
+ RuntimeError: kv_n is not equal to `num_key_value_heads`.
9139
+ RuntimeError: `attn_mask` shape is not valid.
9140
+ RuntimeError: `sparse_mode` is specified but is not 0, 1, 2, 3 or 4.
9141
+ RuntimeError: `query` dtype is bfloat16 and D axis is not aligned with 16.
9142
+ RuntimeError: `input_layout` is BSH and kv_h is not divisible by `num_key_value_heads`.
9143
+ RuntimeError: D-axis of `query`, `key` and `value` is not the same.
9144
+ RuntimeError: In post quant per-channel scenario, D-axis is not 32 Byte aligned.
8271
9145
 
8272
- Supported Platforms:
9146
+ Supported Platforms:
8273
9147
  ``Ascend``
8274
9148
 
8275
9149
  Examples:
8276
- >>> from mindspore.ops.function.nn_func import prompt_flash_attention
8277
- >>> from mindspore import Tensor
9150
+ >>> from mindspore import Tensor, ops
8278
9151
  >>> import numpy as np
8279
9152
  >>> B = 1
8280
9153
  >>> N = 16
@@ -8284,13 +9157,13 @@ def prompt_flash_attention(query, key, value, attn_mask, actual_seq_lengths, act
8284
9157
  >>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
8285
9158
  >>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
8286
9159
  >>> out = ops.prompt_flash_attention(query, key, value, None, None, None, None, None, None, None, None,
8287
- None, N, input_layout='BNSD')
9160
+ ... None, N, input_layout='BNSD')
8288
9161
  >>> print(out.shape)
8289
9162
  (1, 16, 256, 16)
8290
9163
  """
8291
9164
 
8292
- pfa = _get_cache_prim(NN_OPS.PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
8293
- num_key_value_heads, sparse_mode, inner_precise)
9165
+ pfa = _get_cache_prim(PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
9166
+ num_key_value_heads, sparse_mode, inner_precise)
8294
9167
  return pfa(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift, deq_scale1,
8295
9168
  quant_scale1, deq_scale2, quant_scale2, quant_offset2)
8296
9169
 
@@ -8301,21 +9174,17 @@ def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=
8301
9174
  num_heads=1, input_layout='BSH', scale_value=1.0, num_key_value_heads=0,
8302
9175
  block_size=0, inner_precise=1, kv_padding_size=None):
8303
9176
  r"""
8304
- B -- Batch size
8305
-
8306
- N -- Num heads
8307
-
8308
- kvN -- Num key value heads
8309
-
8310
- S -- Sequence length
9177
+ The interface for incremental inference.
8311
9178
 
8312
- D -- Head dim
9179
+ - B: Batch size
9180
+ - N: Num of attention heads
9181
+ - kvN: Num of `key` / `value` heads
9182
+ - S: Sequence length
9183
+ - D: Head dim
9184
+ - H: Hidden layer size
9185
+ - kvH: Hidden size of `key` / `value`
8313
9186
 
8314
- H -- Hidden size
8315
-
8316
- kvH -- Hidden size of key value
8317
-
8318
- where :math:`H=N\times D`, :math:`kvH=kvN\times D`
9187
+ where :math:`H=N\times D`, :math:`kvH=kvN\times D`.
8319
9188
 
8320
9189
  Self attention constructs an attention model based on the relationship between input samples themselves. The
8321
9190
  principle is to assume that there is a length of the input sample sequence :math:`x` of :math:`n`, and each
@@ -8330,62 +9199,62 @@ def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=
8330
9199
  becoming too large, it is usually scaled by dividing it by the square root of :math:`d` and perform softmax
8331
9200
  normalization on each row, yields a matrix of :math:`n\times d` after multiplying :math:`V`.
8332
9201
 
8333
- .. warning::
8334
- This is an experimental API that is subject to change or deletion.
8335
-
8336
9202
  Note:
8337
- - If there is no input parameter and no default value, None needs to be passed.
8338
- - The shape of the tensor corresponding to the key and value parameters needs to be completely consistent.
8339
- - :math:`N` of parameter query is equal with num_heads. :math:`N` of parameter key and parameter value is equal
8340
- with num_key_value_heads. num_heads is a multiple of num_key_value_heads.
9203
+ - If there is no input parameter and no default value, ``None`` needs to be passed.
9204
+ - The shape of the tensor corresponding to the `key` and `value` parameters needs to be completely consistent.
9205
+ - :math:`N` of parameter `query` is equal with `num_heads`. :math:`N` of parameter `key` and parameter `value`
9206
+ is equal with `num_key_value_heads`. `num_heads` is a multiple of `num_key_value_heads`.
8341
9207
  - Quantization
8342
9208
 
8343
- - When the data type of query, key, and value is float16 and the data type of output is int8, the input
8344
- parameter quant_scale2 is required and quant_offset2 is optional.
8345
- - When antiquant_scale exists, key and value need to be passed by int8. antiquant_offset is optional.
8346
- - The data type of antiquant_scale and antiquant_offset should be consistency with that of query.
8347
- - pse_shift
9209
+ - When the data type of `query`, `key`, and `value` is float16 and the data type of output is int8, the input
9210
+ parameter `quant_scale2` is required and `quant_offset2` is optional.
9211
+ - When `antiquant_scale` exists, `key` and `value` need to be passed by int8. `antiquant_offset` is optional.
9212
+ - The data type of `antiquant_scale` and `antiquant_offset` should be consistenct with that of `query`.
9213
+ - `pse_shift`
8348
9214
 
8349
- - The pse_shift data type needs to be consistent with the query data type, and only supports D-axis alignment,
9215
+ - The `pse_shift` data type needs to be consistent with `query`, and only supports D-axis alignment,
8350
9216
  which means that the D-axis can be divided by 16.
8351
9217
  - Page attention:
8352
9218
 
8353
- - The necessary condition for enabling page attention is that the block_table exists, and the key
8354
- and value are arranged in a contiguous memory according to the index in the block_table. The support for
8355
- key and value dtypes is float16/bfloat16/int8.
8356
- - In the enabling scenario of page attention, 16 alignment is required when input types of key and value are
8357
- float16/bfloat16, and 32 alignment is required when input types of key and value are int8. It is
8358
- recommended to use 128.
9219
+ - The necessary condition for enabling page attention is that the `block_table` exists, and the `key`
9220
+ and `value` are arranged in a contiguous memory according to the index in the `block_table`. The support
9221
+ dtype for `key` and `value` is float16/bfloat16/int8.
9222
+ - In the enabling scenario of page attention, 16 alignment is required when input types of `key`
9223
+ and `value` are float16/bfloat16, and 32 alignment is required when input dtype of `key` and `value`
9224
+ is int8. It is recommended to use 128.
8359
9225
  - The maximum max_block_num_per_seq currently supported by blocktable is 16k, and exceeding 16k will result
8360
9226
  in interception and error messages; If you encounter :math:`S` being too large and causing
8361
- max_block_num_per_seq to exceed 16k, you can increase the block_size to solve the problem.
8362
- - The multiplication of all dimensions of the shape of the parameters key and value in the page attention
9227
+ max_block_num_per_seq to exceed 16k, you can increase the `block_size` to solve the problem.
9228
+ - The multiplication of all dimensions of the shape of the parameters `key` and `value` in the page attention
8363
9229
  scenario cannot exceed the representation range of int32.
8364
9230
  - When performing per-channel post quantization, page attention cannot be enabled simultaneously.
8365
- - kv_padding_size:
9231
+ - `kv_padding_size`:
8366
9232
 
8367
9233
  - The calculation formula for the starting point of KV cache transfer is
8368
9234
  :math:`S-kv\_padding\_size-actual\_seq\_lengths`. The calculation formula for the transfer endpoint of KV
8369
9235
  cache is :math:`S-kv\_padding\_size`. When the starting or ending point of the KV cache transfer is less
8370
9236
  than 0, the returned data result is all 0.
8371
- - When kv_padding_size is less than 0, it will be set to 0.
8372
- - kv_padding_size needs to be enabled together with the actual_seq_lengths parameter, otherwise it is
9237
+ - When `kv_padding_size` is less than 0, it will be set to 0.
9238
+ - `kv_padding_size` needs to be enabled together with the `actual_seq_lengths` parameter, otherwise it is
8373
9239
  considered as the KV right padding scene.
8374
9240
  - It needs to be enabled together with the atten_mask parameter and ensure that the meaning of atten_mask is
8375
9241
  correct, that is, it can correctly hide invalid data. Otherwise, it will introduce accuracy issues.
8376
- - kv_padding_size does not support page attention scenarios
9242
+ - `kv_padding_size` does not support page attention scenarios.
9243
+
9244
+ .. warning::
9245
+ Only support on Atlas A2 training series.
8377
9246
 
8378
9247
  Args:
8379
9248
  query (Tensor): The query tensor with data type of float16 or bfloat16.
8380
9249
  The shape is :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
8381
- key (TensorList): The key tensor with data type of float16 or bfloat16 or int8.
9250
+ key (Union[tuple, list]): The key tensor with data type of float16 or bfloat16 or int8.
8382
9251
  The shape is :math:`(B, S, kvH)` / :math:`(B, kvN, S, D)`.
8383
- value (TensorList): The value tensor with data type of float16 or bfloat16 or int8.
9252
+ value (Union[tuple, list]): The value tensor with data type of float16 or bfloat16 or int8.
8384
9253
  The shape is :math:`(B, S, kvH)` / :math:`(B, kvN, S, D)`.
8385
9254
  attn_mask (Tensor, optional): The attention mask tensor with data type of bool or int8 or uint8.
8386
9255
  The shape is :math:`(B, S)` / :math:`(B, 1, S)` / :math:`(B, 1, 1, S)`. Default: ``None``.
8387
9256
  actual_seq_lengths (Union[Tensor, tuple[int], list[int]], optional): Describe actual sequence length of each
8388
- input with data type of int32 or int64. The shape is :math:`(B, )`. Default: ``None``.
9257
+ input with data type of int64. The shape is :math:`(B, )`. Default: ``None``.
8389
9258
  pse_shift (Tensor, optional): The position encoding tensor with data type of float16 or bfloat16. Input tensor
8390
9259
  of shape :math:`(1, N, 1, S)` / :math:`(B, N, 1, S)`. Default: ``None``.
8391
9260
  dequant_scale1 (Tensor, optional): Quantitative parametor, the tensor with data type of uint64 or float32. It
@@ -8400,22 +9269,25 @@ def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=
8400
9269
  The shape is :math:`(1,)`. Default: ``None``.
8401
9270
  antiquant_scale (Tensor, optional): Pseudo Quantitative parametor, the tensor with data type of float16 or
8402
9271
  bfloat16. The shape is :math:`(2, kvN, 1, D)` when input_layout is 'BNSD' or :math:`(2, kvH)` when
8403
- input_layout is 'BSH'. Default: ``None``.
9272
+ input_layout is 'BSH'. Default: ``None``.
8404
9273
  antiquant_offset (Tensor, optional): Pseudo Quantitative parametor, the tensor with data type of float16 or
8405
9274
  bfloat16. The shape is :math:`(2, kvN, 1, D)` when input_layout is 'BNSD' or :math:`(2, kvH)` when
8406
- input_layout is 'BSH'. Default: ``None``.
9275
+ input_layout is 'BSH'. Default: ``None``.
8407
9276
  block_table (Tensor, optional): The tensor with data type of int32. The shape is
8408
9277
  :math:`(B, max\_block\_num\_per\_seq)`,
8409
9278
  where :math:`max\_block\_num\_per\_seq = ceil(\frac{max(actual\_seq\_length)}{block\_size} )`.
8410
9279
  Default: ``None``.
8411
- num_heads (int): The number of heads.
8412
- input_layout (str): The data layout of the input qkv, support 'BSH' and 'BNSD'. Default ``'BSH'``.
8413
- scale_value (double): The scale value indicating the scale coefficient, which is used as the scalar of
8414
- Muls in the calculation. Default: ``1.0``.
8415
- num_key_value_heads (int): Head numbers of key/value which are used in GQA algorithm.
8416
- The value 0 indicates if the key and value have the same head nums, use numHeads. Default: ``0``.
8417
- block_size (int): The maximum number of tokens stored in each block of KV in page attention. Default: ``0``.
8418
- inner_precise (int): Default: ``1``.
9280
+ num_heads (int, optional): The number of heads. Default: ``1``.
9281
+ input_layout (str, optional): The data layout of the input qkv, support 'BSH' and 'BNSD'. Default ``'BSH'``.
9282
+ scale_value (double, optional): The scale value indicating the scale coefficient, which is used as
9283
+ the scalar of Muls in the calculation. Default: ``1.0``.
9284
+ num_key_value_heads (int, optional): Head numbers of `key`/`value` which are used in GQA algorithm.
9285
+ The value 0 indicates if the `key` and `value` have the same head nums, use numHeads. Default: ``0``.
9286
+ block_size (int, optional): The maximum number of tokens stored in each block of KV in page attention.
9287
+ Default: ``0``.
9288
+ inner_precise (int, optional): An int number from {0, 1} indicates computing mode.
9289
+ ``0`` for high precision mode for float16 dtype. ``1`` for high performance mode.
9290
+ Default: ``1``.
8419
9291
  kv_padding_size (Tensor, optional): The tensor with data type of int64. The range of values is
8420
9292
  :math:`0\le kv\_padding\_size \le S-max(actual\_seq\_length)`. The shape is :math:`()` or :math:`(1,)`.
8421
9293
  Default: ``None``.
@@ -8423,6 +9295,25 @@ def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=
8423
9295
  Returns:
8424
9296
  attention_out (Tensor), the shape is :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
8425
9297
 
9298
+ Raises:
9299
+ TypeError: dtype of `query` is not float16 or bfloat16.
9300
+ TypeError: `key` and `value` don't have the same dtype.
9301
+ TypeError: dtype of `attn_mask` is not bool, int8 or uint8.
9302
+ TypeError: dtype of `pse_shift` is not bfloat16 or float16.
9303
+ TypeError: `scale_value` is not a double number.
9304
+ TypeError: `input_layout` is not a string.
9305
+ TypeError: `num_key_value_heads` or `num_heads` is not an int.
9306
+ TypeError: `inner_precise` is not an int.
9307
+ TypeError: `quant_scale1` is not Tensor of type float32.
9308
+ TypeError: `quant_scale2` is not Tensor of type float32.
9309
+ TypeError: `quant_offset2` is not Tensor of type float32.
9310
+ ValueError: size of `actual_seq_lengths` is not 1 or B.
9311
+ ValueError: `input_layout` is a string but of `(BSH)` or `(BNSD)`.
9312
+ ValueError: `num_heads` is not divisible by Q_H.
9313
+ ValueError: `num_heads` is not divisible by `num_key_value_heads`.
9314
+ RuntimeError: `num_heads` is not greater than 0.
9315
+ RuntimeError: `attn_mask` shape is not valid.
9316
+
8426
9317
  Supported Platforms:
8427
9318
  ``Ascend``
8428
9319
 
@@ -8435,7 +9326,7 @@ def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=
8435
9326
  >>> query = Tensor(np.random.randn(B, 1, N * D), mstype.float16)
8436
9327
  >>> key = [Tensor(np.random.randn(B, S, kvN * D), mstype.float16)]
8437
9328
  >>> value = [Tensor(np.random.randn(B, S, kvN * D), mstype.float16)]
8438
- >>> ifa_ms = ops.functional.incre_flash_attention
9329
+ >>> ifa_ms = ops.incre_flash_attention
8439
9330
  >>> attn_out = ifa_ms(query, key, value, num_heads=N, num_key_value_heads=kvN)
8440
9331
  >>> attn_out
8441
9332
  Tensor(shape=[1, 1, 512], dtype=Float16, value=
@@ -8458,7 +9349,7 @@ def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, sca
8458
9349
  Args:
8459
9350
  input (Tensor): The indices used to lookup in the `weight`. The data type must be mindspore.int32 or
8460
9351
  mindspore.int64, and the value should be in range `[0, weight.shape[0])`.
8461
- weight (Parameter): The matrix where to lookup from. The shape must be 2D.
9352
+ weight (Union[Parameter, Tensor]): The matrix where to lookup from. The shape must be 2D.
8462
9353
  padding_idx (int, optional): If the value is not None, the corresponding row of `weight` will not be updated
8463
9354
  in training. The value should be in range `[-weight.shape[0], weight.shape[0])` if it's not ``None``.
8464
9355
  Default ``None``.
@@ -8475,7 +9366,6 @@ def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, sca
8475
9366
  Raises:
8476
9367
  ValueError: If `padding_idx` is out of valid range.
8477
9368
  ValueError: If the shape of `weight` is invalid.
8478
- TypeError: `weight` is not a :class:`mindspore.Parameter`.
8479
9369
 
8480
9370
  Supported Platforms:
8481
9371
  ``Ascend``
@@ -8500,6 +9390,215 @@ def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, sca
8500
9390
  return embedding_op(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)
8501
9391
 
8502
9392
 
9393
+ def speed_fusion_attention(query, key, value, head_num, input_layout, *, pse=None, padding_mask=None, atten_mask=None,
9394
+ scale=1.0, keep_prob=1.0, pre_tokens=2147483647, next_tokens=2147483647, inner_precise=0,
9395
+ prefix=None, actual_seq_qlen=None, actual_seq_kvlen=None, sparse_mode=0,
9396
+ gen_mask_parallel=True, sync=False, pse_type=1, q_start_idx=None, kv_start_idx=None):
9397
+ r"""
9398
+ The interface is used for self-attention fusion computing.
9399
+ If `pse_type` is ``1`` , calculation formula is:
9400
+
9401
+ .. math::
9402
+ attention\_out = Dropout(Softmax(Mask(scale * (pse + query * key^{T}), atten\_mask)), keep\_prob) * value
9403
+
9404
+ If `pse_type` is other valid value, calculation formula is:
9405
+
9406
+ .. math::
9407
+ attention\_out = Dropout(Softmax(Mask(scale * (query * key^{T}) + pse, atten\_mask)), keep\_prob) * value
9408
+
9409
+ - B: Batch size. Value range 1 to 2k.
9410
+ - S1: Sequence length of query. Value range 1 to 512k.
9411
+ - S2: Sequence length of key and value. Value range 1 to 512k.
9412
+ - N1: Num heads of query. Value range 1 to 256.
9413
+ - N2: Num heads of key and value, and N2 must be a factor of N1.
9414
+ - D: Head size. The value ranges is a multiple of 16, with the max value of 512.
9415
+ - H1: Hidden size of query, which equals to N1 * D.
9416
+ - H2: Hidden size of key and value, which equals to N2 * D.
9417
+
9418
+ .. warning::
9419
+ - This is an experimental API that is subject to change or deletion.
9420
+ - Only support on Atlas A2 training series.
9421
+
9422
+ Note:
9423
+ This interface is not supported in `graph mode (mode=mindspore.GRAPH_MODE)
9424
+ <https://www.mindspore.cn/tutorials/en/master/compile/static_graph.html>`_.
9425
+
9426
+ Args:
9427
+ query (Tensor): The query tensor. Input tensor of shape :math:`(B, S1, H1)`,
9428
+ :math:`(B, N1, S1, D)`, :math:`(S1, B, H1)`, :math:`(B, S1, N1, D)` or :math:`(T1, N1, D)`.
9429
+ key (Tensor): The key tensor. Input tensor of shape :math:`(B, S2, H2)`,
9430
+ :math:`(B, N2, S2, D)`, :math:`(S2, B, H2)`, :math:`(B, S2, N2, D)` or :math:`(T2, N2, D)`.
9431
+ value (Tensor): The value tensor. Input tensor of shape :math:`(B, S2, H2)`,
9432
+ :math:`(B, N2, S2, D)`, :math:`(S2, B, H2)`, :math:`(B, S2, N2, D)` or :math:`(T2, N2, D)`.
9433
+ The `key` and `value` should have the same shape.
9434
+ head_num (int): The head num of query, equal to N1.
9435
+ input_layout (str): Specifies the layout of input `query`, `key` and `value`. The value can be ``"BSH"`` ,
9436
+ ``"BNSD"`` , ``"SBH"`` , ``"BSND"`` or ``"TND"`` . ``"TND"`` is an experimental format.
9437
+ When `input_layout` is ``"TND"`` , the following restrictions must be met.
9438
+ There are two lists that represent the length of the input sequence: list_seq_q and list_seq_k. Each
9439
+ value in the list indicates the length of the sequence in the batch. For example, list_seq_q = [4, 2, 6],
9440
+ list_seq_k = [10, 3, 9]. The element of list indicate S. T1 is sum(list_seq_q) = 12, T2 is
9441
+ sum(list_seq_k) = 22.
9442
+ max_seqlen_q = max(list_seq_q), max_seqlen_k = max(list_seq_k).
9443
+ qk_pointer = sum(list_seq_q * list_seq_k), which is the sum of the element multiplication.
9444
+
9445
+ - The lengths of two lists are the same, and size of list is batch. batch is less than or equal to 1024.
9446
+ - When `input_layout` is ``"TND"`` , `actual_seq_qlen` and `actual_seq_kvlen` must be not ``None`` .
9447
+ Otherwise, they are ``None`` .
9448
+ - The `actual_seq_qlen` and `actual_seq_kvlen` are the cumulative sum of sequence of key/value, so they must
9449
+ be non-decreasing.
9450
+ - If `pse` is not ``None`` , list_seq_q and list_seq_k must be same. The maximum value of list_seq_q and
9451
+ list_seq_k is greater than 1024. `pse` should be :math:`(B, N1, 1024, S2)` and
9452
+ :math:`(1, N1, 1024, S2)`, and S2 is equal to max_seqlen_k.
9453
+ - `atten_mask` must be a lower trianglar matrix, so `sparse_mode` should be 2 or 3. The shape of
9454
+ `atten_mask` should be :math:`(2048, 2048)`.
9455
+ - Prefix is ``None`` .
9456
+ - `next_tokens` is 0, and `pre_tokens` is not less than max_seqlen_q.
9457
+ - When `sparse_mode` is 3, S1 of each batch should be less than or equal to S2.
9458
+ - 0 should not exist in list_seq_k.
9459
+
9460
+ Keyword Args:
9461
+ pse (Tensor, optional): The position embedding code, dtype is same as `query`. Default: ``None`` .
9462
+ If S is greater than 1024 and the mask of the lower triangle is used, enter only the inverse 1024 lines of
9463
+ the lower triangle for memory optimization. Input tensor of shape :math:`(B, N1, S1, S2)`,
9464
+ :math:`(1, N1, S1, S2)`, :math:`(B, N1, 1024, S2)`, :math:`(1, N1, 1024, S2)`.
9465
+
9466
+ - ALiBi scenario: `pse` must meet the ALiBi rule, and `sparse_mode` is 2 or 3 for the lower triangle.
9467
+ In this scenario, `pse` is :math:`(B, N1, 1024, S2)`, :math:`(1, N1, 1024, S2)`.
9468
+ - Non-ALiBi scenario: `pse` is :math:`(B, N1, S1, S2)`, :math:`(1, N1, S1, S2)`.
9469
+ - The shape of `pse` should be :math:`(B, N1, 1024, S2)` and :math:`(1, N1, 1024, S2)` when `input_layout`
9470
+ is ``"TND"`` .
9471
+ - If `pse_type` is 2 or 3, dtype of `pse` must be float32, and shape of `pse` should be :math:`(B, N1)` or
9472
+ :math:`(N1,)`.
9473
+
9474
+ padding_mask (Tensor, optional): Reserved parameter. Not implemented yet. Default: ``None`` .
9475
+ atten_mask (Tensor, optional): The attention mask tensor. For each element, 0/False indicates retention and
9476
+ 1/True indicates discard. Input tensor of shape :math:`(B, N1, S1, S2)`, :math:`(B, 1, S1, S2)`,
9477
+ :math:`(S1, S2)` or :math:`(2048, 2048)`. Default: ``None`` .
9478
+
9479
+ - In compression scenario, `sparse_mode` is 2, 3, or 4, `atten_mask` must be :math:`(2048, 2048)`.
9480
+ - When `sparse_mode` is 5, `atten_mask` must be :math:`(B, N1, S1, S2)`, :math:`(B, 1, S1, S2)`.
9481
+ - When `sparse_mode` is 0 and 1, `atten_mask` should be :math:`(B, N1, S1, S2)`, :math:`(B, 1, S1, S2)`,
9482
+ :math:`(S1, S2)`.
9483
+
9484
+ scale (float, optional): The scale factor of score. Generally, the value is 1.0 / (D ** 0.5). Default: ``1.0`` .
9485
+ keep_prob (float, optional): The keep probability of dropout. Value range is (0.0, 1.0]. Default: ``1.0`` .
9486
+ pre_tokens (int, optional): Parameter for sparse computation, represents how many tokens are counted forward.
9487
+ When `sparse_mode` is set to 1, 2, 3, or 5, this parameter does not take effect. Default: ``2147483647`` .
9488
+ next_tokens (int, optional): Parameter for sparse computation, represents how many tokens are counted backward.
9489
+ When `sparse_mode` is set to 1, 2, 3, or 5, this parameter does not take effect. Default: ``2147483647`` .
9490
+ The value of pre_tokens corresponds to S1, and the value of next_tokens corresponds to S2. They define the
9491
+ valid area on the `atten_mask` matrix. It must ensure that the band is not empty.
9492
+ The following values are not allowed:
9493
+
9494
+ - pre_tokens < 0 and next_tokens < 0.
9495
+ - (pre_tokens < 0 and next_tokens >= 0) and (next_tokens < abs(pre_tokens) or abs(pre_tokens) >= S2).
9496
+ - (pre_tokens >= 0 and next_tokens < 0) and (abs(next_tokens) > pre_tokens or abs(next_tokens) >= S1).
9497
+
9498
+ inner_precise (int, optional): The parameter is reserved and not implemented yet. Default: ``0`` .
9499
+ prefix (Union[tuple[int], list[int]], optional): N value of each Batch in the prefix sparse calculation
9500
+ scenario. Input tensor of shape :math:`(B,)`. B max value 32. Not none only when sparse_mode is 5.
9501
+ If S1 > S2, N ranges from 0 to S2. If S1 <= S2, N ranges from S2 - S1 to S2. Default: ``None`` .
9502
+ actual_seq_qlen (Union[tuple[int], list[int]], optional): Size of query corresponding to each batch, array
9503
+ with increasing values and the last value equal to T1. Default: ``None`` .
9504
+ actual_seq_kvlen (Union[tuple[int], list[int]], optional): Size of key and value corresponding to each batch,
9505
+ array with increasing values and the last value equal to T2. Default: ``None`` .
9506
+ sparse_mode (int, optional): Indicates sparse mode. Default ``0`` .
9507
+
9508
+ - 0: Indicates the defaultMask mode. If `atten_mask` is not passed, the mask operation is not performed,
9509
+ and preTokens and nextTokens(internally assigned as INT_MAX) are ignored. If passed in, the full
9510
+ `atten_mask` matrix (S1 * S2) needs to be passed in, indicating that the part between preTokens and
9511
+ nextTokens needs to be calculated.
9512
+ - 1: Represents allMask, that is, passing in the complete `atten_mask` matrix.
9513
+ - 2: Representing the leftUpCausal mode corresponds to the lower triangle scenario divided by the left
9514
+ vertex, and the optimized `atten_mask` matrix (2048*2048) is required.
9515
+ - 3: Representing the rightDownCausal model corresponds to the lower triangle scene divided by the lower
9516
+ right vertex, and the optimized `atten_mask` matrix (2048*2048) is required.
9517
+ - 4: Represents the band scenario, that is, the part between counting preTokens and nextTokens, and the
9518
+ optimized `atten_mask` matrix (2048*2048) is required.
9519
+ - 5: Represents the prefix scenario, that is, on the basis of rightDownCasual, a matrix with length S1 and
9520
+ width N is added to the left side. The value of N is obtained by the new input prefix, and the N value
9521
+ of each Batch axis is different. Currently not enabled.
9522
+ - 6: Represents the global scenario. Currently not enabled.
9523
+ - 7: Represents the dilated scenario. Currently not enabled.
9524
+ - 8: Represents the block_local scenario. Currently not enabled.
9525
+
9526
+ gen_mask_parallel (bool, optional): Debug parameter, a switch to control dropout_gen_mask execution method.
9527
+ If ``True`` , dropout_gen_mask is executed in parallel. If ``False`` , execution is serial.
9528
+ Not implemented yet. Default: ``True`` .
9529
+ sync (bool, optional): Debug parameter, a switch to control dropout_gen_mask execution method.
9530
+ If ``True`` , dropout_gen_mask is executed synchronously. If ``False`` , execution is asynchronous.
9531
+ Not implemented yet. Default: ``False`` .
9532
+ pse_type (int, optional): Indicates how to use `pse`. Default ``1`` .
9533
+
9534
+ - 0: `pse` is passed from outside, and the calculation process is to first mul `scale` and then add `pse`.
9535
+ - 1: `pse` is passed from outside, and the calculation process is to add `pse` first and then mul `scale`.
9536
+ - 2: `pse` is generated internally and generates standard alibi position information. The internally
9537
+ generated alibi matrix 0 line is aligned with the upper left corner of :math:`query * key^{T}`.
9538
+ - 3: `pse` is generated internally, and the generated alibi position information is based on the standard
9539
+ and then the square root of sqrt is done. The internally generated alibi matrix 0 line is aligned with
9540
+ the upper left corner of :math:`query * key^{T}`.
9541
+
9542
+ q_start_idx (Union[tuple[int], list[int]], optional): Int array with length 1. Default: ``None`` .
9543
+ When pse_type is configured as ``2`` or ``3`` , it indicates the number of cells that the internally
9544
+ generated alibi code is offset in the S1 direction. A positive number indicates that 0 moves diagonally
9545
+ upward.
9546
+ kv_start_idx (Union[tuple[int], list[int]], optional): Int array with length 1. Default: ``None`` .
9547
+ When pse_type is configured as ``2`` or ``3`` , it indicates the number of cells that the internally
9548
+ generated alibi code is offset in the S2 direction. A positive number indicates that 0 moves diagonally
9549
+ upward.
9550
+
9551
+ Returns:
9552
+ A tuple of tensors containing `attention_out`, `softmax_max`, `softmax_sum`, `softmax_out`, `seed`, `offset`
9553
+ and `numels` .
9554
+
9555
+ - `attention_out` is the output of attention, it's shape, and data type are the same as the query.
9556
+ - `softmax_max` is the max intermediate result calculated by Softmax, used for grad calculation.
9557
+ - `softmax_sum` is the sum intermediate result calculated by Softmax, used for grad calculation.
9558
+ - `softmax_out` is a reserved parameter.
9559
+ - `seed` is generated seed, used for Dropout.
9560
+ - `offset` is generated offset, used for Dropout.
9561
+ - `numels` is the length of generated dropout_mask.
9562
+
9563
+ Raises:
9564
+ TypeError: `query`, `key` and `value` don't have the same dtype.
9565
+ TypeError: Dtype of `atten_mask` is not bool or uint8.
9566
+ TypeError: `scale` or `keep_prob` is not a float number.
9567
+ TypeError: `input_layout` is not a string.
9568
+ TypeError: `head_num` is not an int.
9569
+ TypeError: `sparse_mode` is not an int.
9570
+ TypeError: `pse` is not Tensor type.
9571
+ TypeError: `padding_mask` is not Tensor type.
9572
+ TypeError: `atten_mask` is not Tensor type.
9573
+ TypeError: `pse_type` is not an int.
9574
+ ValueError: `input_layout` is a string but not valid.
9575
+ ValueError: The specified value of `sparse_mode` is invalid.
9576
+ ValueError: The specified value of `pse_type` is invalid.
9577
+
9578
+ Supported Platforms:
9579
+ ``Ascend``
9580
+
9581
+ Examples:
9582
+ >>> import mindspore
9583
+ >>> import mindspore.common.dtype as mstype
9584
+ >>> import numpy as np
9585
+ >>> from mindspore import ops, Tensor
9586
+ >>> query = Tensor(np.ones([2, 4, 64]), dtype=mstype.float16)
9587
+ >>> key = Tensor(np.ones([2, 4, 64]), dtype=mstype.float16)
9588
+ >>> value = Tensor(np.ones([2, 4, 64]), dtype=mstype.float16)
9589
+ >>> head_num = 4
9590
+ >>> input_layout = "BSH"
9591
+ >>> output = ops.speed_fusion_attention(query, key, value, head_num, input_layout)
9592
+ >>> print(output[0].shape)
9593
+ (2, 4, 64)
9594
+ """
9595
+ seed, offset = default_generator._step(generator_step_) # pylint: disable=protected-access
9596
+ return speed_fusion_attention_op(query, key, value, head_num, input_layout, seed, offset, pse, padding_mask,
9597
+ atten_mask, scale, keep_prob, pre_tokens, next_tokens, inner_precise, prefix,
9598
+ actual_seq_qlen, actual_seq_kvlen, sparse_mode, gen_mask_parallel, sync, pse_type,
9599
+ q_start_idx, kv_start_idx)
9600
+
9601
+
8503
9602
  __all__ = [
8504
9603
  'adaptive_avg_pool1d',
8505
9604
  'adaptive_avg_pool2d',
@@ -8530,11 +9629,15 @@ __all__ = [
8530
9629
  'fast_gelu',
8531
9630
  'fractional_max_pool2d',
8532
9631
  'fractional_max_pool3d',
9632
+ 'speed_fusion_attention',
8533
9633
  'pixel_shuffle',
8534
9634
  'pixel_unshuffle',
8535
9635
  'hardshrink',
8536
9636
  'is_floating_point',
8537
9637
  'incre_flash_attention',
9638
+ 'prompt_flash_attention',
9639
+ 'flash_attention_score',
9640
+ 'fused_infer_attention_score',
8538
9641
  'flip',
8539
9642
  'fliplr',
8540
9643
  'flipud',
@@ -8555,7 +9658,6 @@ __all__ = [
8555
9658
  'softplus',
8556
9659
  'selu',
8557
9660
  'silu',
8558
- 'soft_margin_loss',
8559
9661
  'softmax',
8560
9662
  'softmin',
8561
9663
  'pdist',
@@ -8577,6 +9679,7 @@ __all__ = [
8577
9679
  'conv2d',
8578
9680
  'conv_transpose2d',
8579
9681
  'sigmoid',
9682
+ 'soft_margin_loss',
8580
9683
  'logsigmoid',
8581
9684
  'relu',
8582
9685
  'relu6',
@@ -8594,6 +9697,8 @@ __all__ = [
8594
9697
  'gaussian_nll_loss',
8595
9698
  'lp_pool1d',
8596
9699
  'lp_pool2d',
9700
+ 'moe_token_permute',
9701
+ 'moe_token_unpermute',
8597
9702
  'max_unpool1d',
8598
9703
  'max_unpool2d',
8599
9704
  'max_unpool3d',
@@ -8605,5 +9710,6 @@ __all__ = [
8605
9710
  'add_layer_norm',
8606
9711
  'group_norm',
8607
9712
  'rms_norm',
9713
+ 'add_rms_norm',
8608
9714
  ]
8609
9715
  __all__.sort()