mindspore 2.4.10__cp39-cp39-win_amd64.whl → 2.6.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (579) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +13 -6
  3. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  6. mindspore/_check_jit_forbidden_api.py +3 -0
  7. mindspore/_checkparam.py +3 -38
  8. mindspore/_deprecated/__init__.py +17 -0
  9. mindspore/_deprecated/jit.py +198 -0
  10. mindspore/_extends/builtin_operations.py +1 -1
  11. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  12. mindspore/_extends/parse/__init__.py +6 -7
  13. mindspore/_extends/parse/compile_config.py +83 -0
  14. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  15. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
  16. mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
  17. mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
  18. mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
  19. mindspore/_extends/parse/parser.py +47 -198
  20. mindspore/_extends/parse/resources.py +1 -5
  21. mindspore/_extends/parse/standard_method.py +229 -99
  22. mindspore/_extends/pijit/__init__.py +2 -2
  23. mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
  24. mindspore/_extends/pijit/tensor_func_list.py +27 -0
  25. mindspore/_extends/utils.py +1 -1
  26. mindspore/amp.py +11 -5
  27. mindspore/avcodec-59.dll +0 -0
  28. mindspore/avdevice-59.dll +0 -0
  29. mindspore/avfilter-8.dll +0 -0
  30. mindspore/avformat-59.dll +0 -0
  31. mindspore/avutil-57.dll +0 -0
  32. mindspore/boost/__init__.py +2 -2
  33. mindspore/boost/base.py +3 -7
  34. mindspore/boost/boost_cell_wrapper.py +138 -43
  35. mindspore/common/__init__.py +6 -3
  36. mindspore/common/_grad_function.py +56 -0
  37. mindspore/common/_pijit_context.py +14 -5
  38. mindspore/common/_register_for_tensor.py +1 -2
  39. mindspore/common/_stub_tensor.py +30 -14
  40. mindspore/common/_tensor_cpp_method.py +17 -0
  41. mindspore/common/_tensor_docs.py +4760 -0
  42. mindspore/common/api.py +480 -372
  43. mindspore/common/auto_dynamic_shape.py +41 -44
  44. mindspore/common/dtype.py +39 -36
  45. mindspore/common/dump.py +9 -6
  46. mindspore/common/file_system.py +9 -1
  47. mindspore/common/generator.py +5 -0
  48. mindspore/common/hook_handle.py +6 -2
  49. mindspore/common/initializer.py +13 -10
  50. mindspore/common/jit_begin_end.py +94 -0
  51. mindspore/common/jit_config.py +6 -1
  52. mindspore/common/jit_context.py +76 -0
  53. mindspore/common/jit_trace.py +378 -0
  54. mindspore/common/lazy_inline.py +9 -3
  55. mindspore/common/mindir_util.py +10 -2
  56. mindspore/common/mutable.py +5 -4
  57. mindspore/common/parameter.py +135 -52
  58. mindspore/common/seed.py +2 -2
  59. mindspore/common/sparse_tensor.py +23 -17
  60. mindspore/common/tensor.py +975 -1981
  61. mindspore/communication/__init__.py +7 -5
  62. mindspore/communication/_comm_helper.py +52 -2
  63. mindspore/communication/comm_func.py +240 -181
  64. mindspore/communication/management.py +95 -26
  65. mindspore/context.py +324 -573
  66. mindspore/dataset/__init__.py +65 -37
  67. mindspore/dataset/audio/__init__.py +2 -8
  68. mindspore/dataset/audio/transforms.py +3 -17
  69. mindspore/dataset/callback/ds_callback.py +2 -1
  70. mindspore/dataset/core/config.py +87 -6
  71. mindspore/dataset/engine/cache_admin.py +3 -3
  72. mindspore/dataset/engine/cache_client.py +6 -5
  73. mindspore/dataset/engine/datasets.py +292 -267
  74. mindspore/dataset/engine/datasets_audio.py +22 -8
  75. mindspore/dataset/engine/datasets_standard_format.py +46 -27
  76. mindspore/dataset/engine/datasets_text.py +78 -48
  77. mindspore/dataset/engine/datasets_user_defined.py +183 -117
  78. mindspore/dataset/engine/datasets_vision.py +120 -44
  79. mindspore/dataset/engine/iterators.py +283 -63
  80. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
  81. mindspore/dataset/engine/obs/util.py +8 -0
  82. mindspore/dataset/engine/queue.py +40 -0
  83. mindspore/dataset/engine/samplers.py +289 -43
  84. mindspore/dataset/engine/serializer_deserializer.py +3 -2
  85. mindspore/dataset/engine/validators.py +53 -11
  86. mindspore/dataset/text/__init__.py +7 -6
  87. mindspore/dataset/text/transforms.py +6 -5
  88. mindspore/dataset/text/utils.py +3 -3
  89. mindspore/dataset/transforms/__init__.py +0 -9
  90. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  91. mindspore/dataset/transforms/transforms.py +31 -14
  92. mindspore/dataset/utils/browse_dataset.py +1 -1
  93. mindspore/dataset/vision/__init__.py +2 -9
  94. mindspore/dataset/vision/transforms.py +202 -158
  95. mindspore/dataset/vision/utils.py +7 -5
  96. mindspore/dataset/vision/validators.py +1 -2
  97. mindspore/device_context/__init__.py +21 -0
  98. mindspore/device_context/ascend/__init__.py +25 -0
  99. mindspore/device_context/ascend/device.py +72 -0
  100. mindspore/device_context/ascend/op_debug.py +153 -0
  101. mindspore/device_context/ascend/op_precision.py +193 -0
  102. mindspore/device_context/ascend/op_tuning.py +123 -0
  103. mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
  104. mindspore/device_context/cpu/device.py +62 -0
  105. mindspore/device_context/cpu/op_tuning.py +43 -0
  106. mindspore/device_context/gpu/__init__.py +21 -0
  107. mindspore/device_context/gpu/device.py +70 -0
  108. mindspore/device_context/gpu/op_precision.py +67 -0
  109. mindspore/device_context/gpu/op_tuning.py +175 -0
  110. mindspore/device_manager.py +170 -0
  111. mindspore/dnnl.dll +0 -0
  112. mindspore/experimental/es/embedding_service.py +35 -27
  113. mindspore/experimental/llm_boost/__init__.py +1 -0
  114. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  115. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +209 -0
  116. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  117. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  118. mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
  119. mindspore/experimental/llm_boost/register.py +1 -0
  120. mindspore/experimental/map_parameter.py +4 -4
  121. mindspore/experimental/optim/adadelta.py +6 -6
  122. mindspore/experimental/optim/adagrad.py +4 -4
  123. mindspore/experimental/optim/adam.py +7 -0
  124. mindspore/experimental/optim/adamax.py +4 -4
  125. mindspore/experimental/optim/adamw.py +4 -0
  126. mindspore/experimental/optim/asgd.py +1 -1
  127. mindspore/experimental/optim/lr_scheduler.py +73 -46
  128. mindspore/experimental/optim/radam.py +34 -31
  129. mindspore/experimental/optim/rprop.py +1 -1
  130. mindspore/experimental/optim/sgd.py +1 -1
  131. mindspore/hal/contiguous_tensors_handle.py +6 -10
  132. mindspore/hal/device.py +55 -53
  133. mindspore/hal/event.py +52 -52
  134. mindspore/hal/memory.py +179 -120
  135. mindspore/hal/stream.py +150 -109
  136. mindspore/include/api/context.h +0 -1
  137. mindspore/include/dataset/constants.h +7 -4
  138. mindspore/include/dataset/execute.h +2 -2
  139. mindspore/jpeg62.dll +0 -0
  140. mindspore/log.py +50 -0
  141. mindspore/mindrecord/__init__.py +21 -8
  142. mindspore/mindrecord/config.py +17 -316
  143. mindspore/mindrecord/filereader.py +1 -9
  144. mindspore/mindrecord/filewriter.py +5 -15
  145. mindspore/mindrecord/mindpage.py +1 -9
  146. mindspore/mindspore_backend_common.dll +0 -0
  147. mindspore/mindspore_backend_manager.dll +0 -0
  148. mindspore/mindspore_common.dll +0 -0
  149. mindspore/mindspore_core.dll +0 -0
  150. mindspore/mindspore_dump.dll +0 -0
  151. mindspore/mindspore_frontend.dll +0 -0
  152. mindspore/mindspore_glog.dll +0 -0
  153. mindspore/mindspore_memory_pool.dll +0 -0
  154. mindspore/mindspore_ms_backend.dll +0 -0
  155. mindspore/mindspore_ops.dll +0 -0
  156. mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
  157. mindspore/mindspore_ops_kernel_common.dll +0 -0
  158. mindspore/mindspore_profiler.dll +0 -0
  159. mindspore/mindspore_pyboost.dll +0 -0
  160. mindspore/mindspore_pynative.dll +0 -0
  161. mindspore/mindspore_res_manager.dll +0 -0
  162. mindspore/mindspore_runtime_pipeline.dll +0 -0
  163. mindspore/mint/__init__.py +798 -761
  164. mindspore/mint/distributed/__init__.py +70 -4
  165. mindspore/mint/distributed/distributed.py +2679 -44
  166. mindspore/mint/linalg/__init__.py +8 -0
  167. mindspore/mint/nn/__init__.py +743 -22
  168. mindspore/mint/nn/functional.py +716 -23
  169. mindspore/mint/nn/layer/__init__.py +21 -4
  170. mindspore/mint/nn/layer/_functions.py +334 -0
  171. mindspore/mint/nn/layer/activation.py +276 -1
  172. mindspore/mint/nn/layer/basic.py +123 -0
  173. mindspore/mint/nn/layer/conv.py +933 -0
  174. mindspore/mint/nn/layer/normalization.py +223 -28
  175. mindspore/mint/nn/layer/padding.py +797 -0
  176. mindspore/mint/nn/layer/pooling.py +235 -0
  177. mindspore/mint/optim/__init__.py +3 -1
  178. mindspore/mint/optim/adam.py +223 -0
  179. mindspore/mint/optim/adamw.py +26 -19
  180. mindspore/mint/optim/sgd.py +171 -0
  181. mindspore/mint/special/__init__.py +2 -1
  182. mindspore/multiprocessing/__init__.py +5 -0
  183. mindspore/nn/__init__.py +4 -1
  184. mindspore/nn/cell.py +1373 -192
  185. mindspore/nn/dynamic_lr.py +2 -1
  186. mindspore/nn/layer/activation.py +29 -27
  187. mindspore/nn/layer/basic.py +51 -35
  188. mindspore/nn/layer/channel_shuffle.py +3 -3
  189. mindspore/nn/layer/container.py +1 -1
  190. mindspore/nn/layer/conv.py +53 -42
  191. mindspore/nn/layer/embedding.py +12 -11
  192. mindspore/nn/layer/normalization.py +56 -49
  193. mindspore/nn/layer/padding.py +4 -3
  194. mindspore/nn/layer/pooling.py +120 -42
  195. mindspore/nn/layer/rnn_cells.py +1 -1
  196. mindspore/nn/layer/rnns.py +2 -1
  197. mindspore/nn/layer/timedistributed.py +5 -5
  198. mindspore/nn/layer/transformer.py +59 -36
  199. mindspore/nn/learning_rate_schedule.py +8 -4
  200. mindspore/nn/loss/loss.py +58 -55
  201. mindspore/nn/optim/ada_grad.py +7 -5
  202. mindspore/nn/optim/adadelta.py +11 -9
  203. mindspore/nn/optim/adafactor.py +1 -1
  204. mindspore/nn/optim/adam.py +19 -15
  205. mindspore/nn/optim/adamax.py +8 -7
  206. mindspore/nn/optim/adasum.py +5 -5
  207. mindspore/nn/optim/asgd.py +3 -1
  208. mindspore/nn/optim/ftrl.py +11 -9
  209. mindspore/nn/optim/lamb.py +1 -1
  210. mindspore/nn/optim/lars.py +1 -4
  211. mindspore/nn/optim/lazyadam.py +12 -10
  212. mindspore/nn/optim/momentum.py +7 -6
  213. mindspore/nn/optim/optimizer.py +3 -3
  214. mindspore/nn/optim/proximal_ada_grad.py +12 -10
  215. mindspore/nn/optim/rmsprop.py +13 -12
  216. mindspore/nn/optim/rprop.py +11 -9
  217. mindspore/nn/optim/sgd.py +9 -6
  218. mindspore/nn/optim/tft_wrapper.py +5 -2
  219. mindspore/nn/optim/thor.py +2 -1
  220. mindspore/nn/probability/bijector/bijector.py +17 -11
  221. mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
  222. mindspore/nn/probability/bijector/invert.py +2 -2
  223. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  224. mindspore/nn/probability/bijector/softplus.py +3 -2
  225. mindspore/nn/probability/distribution/beta.py +3 -3
  226. mindspore/nn/probability/distribution/categorical.py +1 -1
  227. mindspore/nn/probability/distribution/cauchy.py +4 -2
  228. mindspore/nn/probability/distribution/exponential.py +6 -7
  229. mindspore/nn/probability/distribution/gamma.py +2 -2
  230. mindspore/nn/probability/distribution/gumbel.py +2 -2
  231. mindspore/nn/probability/distribution/half_normal.py +5 -3
  232. mindspore/nn/probability/distribution/logistic.py +5 -3
  233. mindspore/nn/probability/distribution/poisson.py +1 -1
  234. mindspore/nn/probability/distribution/uniform.py +5 -3
  235. mindspore/nn/reinforcement/_tensors_queue.py +1 -1
  236. mindspore/nn/reinforcement/tensor_array.py +1 -1
  237. mindspore/nn/utils/init.py +13 -11
  238. mindspore/nn/wrap/__init__.py +6 -6
  239. mindspore/nn/wrap/cell_wrapper.py +181 -122
  240. mindspore/nn/wrap/grad_reducer.py +45 -36
  241. mindspore/nn/wrap/loss_scale.py +6 -7
  242. mindspore/numpy/array_creations.py +63 -65
  243. mindspore/numpy/array_ops.py +149 -144
  244. mindspore/numpy/logic_ops.py +41 -42
  245. mindspore/numpy/math_ops.py +361 -359
  246. mindspore/numpy/utils.py +17 -18
  247. mindspore/numpy/utils_const.py +5 -6
  248. mindspore/opencv_core452.dll +0 -0
  249. mindspore/opencv_imgcodecs452.dll +0 -0
  250. mindspore/opencv_imgproc452.dll +0 -0
  251. mindspore/ops/__init__.py +5 -3
  252. mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
  253. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
  254. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  255. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  256. mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
  257. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  258. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  259. mindspore/ops/_register_for_op.py +0 -11
  260. mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
  261. mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
  262. mindspore/ops/_vmap/vmap_array_ops.py +52 -25
  263. mindspore/ops/_vmap/vmap_base.py +0 -2
  264. mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
  265. mindspore/ops/_vmap/vmap_math_ops.py +15 -16
  266. mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
  267. mindspore/ops/auto_generate/__init__.py +4 -3
  268. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +258 -46
  269. mindspore/ops/auto_generate/gen_extend_func.py +757 -185
  270. mindspore/ops/auto_generate/gen_ops_def.py +4197 -2243
  271. mindspore/ops/auto_generate/gen_ops_prim.py +16976 -6055
  272. mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
  273. mindspore/ops/composite/__init__.py +2 -1
  274. mindspore/ops/composite/base.py +20 -25
  275. mindspore/ops/composite/math_ops.py +6 -16
  276. mindspore/ops/composite/multitype_ops/__init__.py +5 -2
  277. mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
  278. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
  279. mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
  280. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  281. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  282. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
  283. mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
  284. mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
  285. mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
  286. mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
  287. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
  288. mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
  289. mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
  290. mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
  291. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
  292. mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
  293. mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
  294. mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
  295. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
  296. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  297. mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
  298. mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
  299. mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
  300. mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
  301. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
  302. mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
  303. mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
  304. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
  305. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  306. mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
  307. mindspore/ops/function/__init__.py +40 -2
  308. mindspore/ops/function/_add_attr_func.py +58 -0
  309. mindspore/ops/function/array_func.py +2089 -2403
  310. mindspore/ops/function/clip_func.py +80 -23
  311. mindspore/ops/function/debug_func.py +57 -57
  312. mindspore/ops/function/grad/__init__.py +1 -0
  313. mindspore/ops/function/grad/grad_func.py +104 -71
  314. mindspore/ops/function/image_func.py +2 -2
  315. mindspore/ops/function/linalg_func.py +47 -78
  316. mindspore/ops/function/math_func.py +4351 -3813
  317. mindspore/ops/function/nn_func.py +1712 -637
  318. mindspore/ops/function/other_func.py +159 -1
  319. mindspore/ops/function/parameter_func.py +18 -84
  320. mindspore/ops/function/random_func.py +452 -387
  321. mindspore/ops/function/reshard_func.py +4 -70
  322. mindspore/ops/function/sparse_func.py +3 -3
  323. mindspore/ops/function/sparse_unary_func.py +6 -6
  324. mindspore/ops/function/spectral_func.py +25 -58
  325. mindspore/ops/function/vmap_func.py +26 -18
  326. mindspore/ops/functional.py +23 -7
  327. mindspore/ops/functional_overload.py +1548 -0
  328. mindspore/ops/op_info_register.py +32 -244
  329. mindspore/ops/operations/__init__.py +23 -15
  330. mindspore/ops/operations/_custom_ops_utils.py +235 -0
  331. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  332. mindspore/ops/operations/_grad_ops.py +2 -43
  333. mindspore/ops/operations/_infer_ops.py +2 -1
  334. mindspore/ops/operations/_inner_ops.py +43 -84
  335. mindspore/ops/operations/_ms_kernel.py +4 -10
  336. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  337. mindspore/ops/operations/_scalar_ops.py +3 -2
  338. mindspore/ops/operations/_sequence_ops.py +1 -1
  339. mindspore/ops/operations/_tensor_array.py +1 -1
  340. mindspore/ops/operations/array_ops.py +81 -324
  341. mindspore/ops/operations/comm_ops.py +154 -108
  342. mindspore/ops/operations/custom_ops.py +298 -87
  343. mindspore/ops/operations/debug_ops.py +157 -59
  344. mindspore/ops/operations/inner_ops.py +7 -5
  345. mindspore/ops/operations/linalg_ops.py +1 -57
  346. mindspore/ops/operations/manually_defined/_inner.py +1 -1
  347. mindspore/ops/operations/manually_defined/ops_def.py +928 -180
  348. mindspore/ops/operations/math_ops.py +32 -234
  349. mindspore/ops/operations/nn_ops.py +212 -531
  350. mindspore/ops/operations/other_ops.py +62 -9
  351. mindspore/ops/operations/random_ops.py +13 -7
  352. mindspore/ops/operations/reshard_ops.py +1 -1
  353. mindspore/ops/operations/sparse_ops.py +2 -2
  354. mindspore/ops/primitive.py +66 -53
  355. mindspore/ops/tensor_method.py +1895 -0
  356. mindspore/ops_generate/__init__.py +0 -5
  357. mindspore/ops_generate/aclnn/__init__.py +0 -0
  358. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
  359. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
  360. mindspore/ops_generate/api/__init__.py +0 -0
  361. mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
  362. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
  363. mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
  364. mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
  365. mindspore/ops_generate/api/functions_cc_generator.py +237 -0
  366. mindspore/ops_generate/api/gen_api.py +103 -0
  367. mindspore/ops_generate/api/op_api_proto.py +235 -0
  368. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
  369. mindspore/ops_generate/common/__init__.py +0 -0
  370. mindspore/ops_generate/common/base_generator.py +11 -0
  371. mindspore/ops_generate/common/gen_constants.py +91 -0
  372. mindspore/ops_generate/common/gen_utils.py +348 -0
  373. mindspore/ops_generate/common/op_proto.py +473 -0
  374. mindspore/ops_generate/common/template.py +523 -0
  375. mindspore/ops_generate/gen_ops.py +22 -1069
  376. mindspore/ops_generate/op_def/__init__.py +0 -0
  377. mindspore/ops_generate/op_def/gen_op_def.py +90 -0
  378. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
  379. mindspore/ops_generate/op_def/ops_def_cc_generator.py +296 -0
  380. mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
  381. mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
  382. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
  383. mindspore/ops_generate/op_def_py/__init__.py +0 -0
  384. mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
  385. mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
  386. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
  387. mindspore/ops_generate/pyboost/__init__.py +0 -0
  388. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
  389. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
  390. mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
  391. mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
  392. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
  393. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
  394. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
  395. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
  396. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
  397. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
  398. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
  399. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
  400. mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
  401. mindspore/ops_generate/resources/__init__.py +0 -0
  402. mindspore/ops_generate/resources/resource_list.py +30 -0
  403. mindspore/ops_generate/resources/resource_loader.py +36 -0
  404. mindspore/ops_generate/resources/resource_manager.py +64 -0
  405. mindspore/ops_generate/resources/yaml_loader.py +88 -0
  406. mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
  407. mindspore/parallel/__init__.py +7 -3
  408. mindspore/parallel/_auto_parallel_context.py +159 -40
  409. mindspore/parallel/_cell_wrapper.py +132 -15
  410. mindspore/parallel/_parallel_serialization.py +107 -5
  411. mindspore/parallel/_ps_context.py +1 -1
  412. mindspore/parallel/_recovery_context.py +7 -2
  413. mindspore/parallel/_tensor.py +142 -18
  414. mindspore/parallel/_utils.py +199 -23
  415. mindspore/parallel/algo_parameter_config.py +4 -4
  416. mindspore/parallel/auto_parallel.py +732 -0
  417. mindspore/parallel/checkpoint_convert.py +159 -0
  418. mindspore/parallel/checkpoint_transform.py +700 -35
  419. mindspore/parallel/cluster/process_entity/_api.py +276 -50
  420. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  421. mindspore/parallel/cluster/run.py +21 -4
  422. mindspore/parallel/function/__init__.py +24 -0
  423. mindspore/parallel/function/reshard_func.py +258 -0
  424. mindspore/parallel/nn/__init__.py +25 -0
  425. mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
  426. mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
  427. mindspore/parallel/parameter_broadcast.py +25 -14
  428. mindspore/parallel/shard.py +137 -59
  429. mindspore/parallel/transform_safetensors.py +364 -305
  430. mindspore/profiler/__init__.py +22 -5
  431. mindspore/profiler/analysis/__init__.py +0 -0
  432. mindspore/profiler/analysis/parser/__init__.py +0 -0
  433. mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
  434. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  435. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  436. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  437. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  438. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  439. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
  440. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  441. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +109 -0
  442. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  443. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  444. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  445. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  446. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  447. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  448. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  449. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  450. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  451. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  452. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
  453. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  454. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  455. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  456. mindspore/profiler/analysis/task_manager.py +131 -0
  457. mindspore/profiler/analysis/time_converter.py +84 -0
  458. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  459. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
  460. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  461. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
  462. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
  463. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
  464. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
  465. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  466. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  467. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
  468. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  469. mindspore/profiler/analysis/work_flow.py +73 -0
  470. mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
  471. mindspore/profiler/common/command_executor.py +90 -0
  472. mindspore/profiler/common/constant.py +186 -3
  473. mindspore/profiler/common/file_manager.py +208 -0
  474. mindspore/profiler/common/log.py +130 -0
  475. mindspore/profiler/common/msprof_cmd_tool.py +221 -0
  476. mindspore/profiler/common/path_manager.py +395 -0
  477. mindspore/profiler/common/process_bar.py +168 -0
  478. mindspore/profiler/common/process_pool.py +9 -3
  479. mindspore/profiler/common/profiler_context.py +500 -0
  480. mindspore/profiler/common/profiler_info.py +304 -0
  481. mindspore/profiler/common/profiler_meta_data.py +74 -0
  482. mindspore/profiler/common/profiler_output_path.py +284 -0
  483. mindspore/profiler/common/profiler_parameters.py +251 -0
  484. mindspore/profiler/common/profiler_path_manager.py +179 -0
  485. mindspore/profiler/common/record_function.py +76 -0
  486. mindspore/profiler/common/tlv_decoder.py +76 -0
  487. mindspore/profiler/common/util.py +75 -2
  488. mindspore/profiler/dynamic_profiler.py +341 -75
  489. mindspore/profiler/envprofiler.py +163 -0
  490. mindspore/profiler/experimental_config.py +197 -0
  491. mindspore/profiler/mstx.py +242 -0
  492. mindspore/profiler/platform/__init__.py +21 -0
  493. mindspore/profiler/platform/base_profiler.py +40 -0
  494. mindspore/profiler/platform/cpu_profiler.py +124 -0
  495. mindspore/profiler/platform/gpu_profiler.py +74 -0
  496. mindspore/profiler/platform/npu_profiler.py +335 -0
  497. mindspore/profiler/profiler.py +1073 -90
  498. mindspore/profiler/profiler_action_controller.py +187 -0
  499. mindspore/profiler/profiler_interface.py +118 -0
  500. mindspore/profiler/schedule.py +243 -0
  501. mindspore/rewrite/api/node.py +15 -13
  502. mindspore/rewrite/api/symbol_tree.py +2 -3
  503. mindspore/run_check/_check_version.py +27 -20
  504. mindspore/run_check/run_check.py +1 -1
  505. mindspore/runtime/__init__.py +37 -0
  506. mindspore/runtime/device.py +27 -0
  507. mindspore/runtime/event.py +209 -0
  508. mindspore/runtime/executor.py +177 -0
  509. mindspore/runtime/memory.py +416 -0
  510. mindspore/runtime/stream.py +460 -0
  511. mindspore/runtime/thread_bind_core.py +401 -0
  512. mindspore/safeguard/rewrite_obfuscation.py +12 -9
  513. mindspore/swresample-4.dll +0 -0
  514. mindspore/swscale-6.dll +0 -0
  515. mindspore/tinyxml2.dll +0 -0
  516. mindspore/train/__init__.py +8 -8
  517. mindspore/train/_utils.py +96 -27
  518. mindspore/train/amp.py +9 -5
  519. mindspore/train/callback/__init__.py +2 -2
  520. mindspore/train/callback/_callback.py +2 -16
  521. mindspore/train/callback/_checkpoint.py +53 -55
  522. mindspore/train/callback/_cluster_monitor.py +14 -18
  523. mindspore/train/callback/_early_stop.py +1 -1
  524. mindspore/train/callback/_flops_collector.py +103 -68
  525. mindspore/train/callback/_history.py +8 -5
  526. mindspore/train/callback/_lambda_callback.py +2 -2
  527. mindspore/train/callback/_landscape.py +0 -3
  528. mindspore/train/callback/_loss_monitor.py +2 -1
  529. mindspore/train/callback/_on_request_exit.py +6 -5
  530. mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
  531. mindspore/train/callback/_summary_collector.py +52 -19
  532. mindspore/train/callback/_time_monitor.py +2 -1
  533. mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +228 -108
  534. mindspore/train/data_sink.py +25 -2
  535. mindspore/train/dataset_helper.py +15 -16
  536. mindspore/train/loss_scale_manager.py +8 -7
  537. mindspore/train/metrics/accuracy.py +3 -3
  538. mindspore/train/metrics/confusion_matrix.py +9 -9
  539. mindspore/train/metrics/error.py +3 -3
  540. mindspore/train/metrics/hausdorff_distance.py +4 -4
  541. mindspore/train/metrics/mean_surface_distance.py +3 -3
  542. mindspore/train/metrics/metric.py +0 -12
  543. mindspore/train/metrics/occlusion_sensitivity.py +4 -2
  544. mindspore/train/metrics/precision.py +11 -10
  545. mindspore/train/metrics/recall.py +9 -9
  546. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  547. mindspore/train/mind_ir_pb2.py +174 -46
  548. mindspore/train/model.py +269 -136
  549. mindspore/train/serialization.py +622 -978
  550. mindspore/train/summary/_summary_adapter.py +2 -2
  551. mindspore/train/summary/summary_record.py +2 -3
  552. mindspore/train/train_thor/model_thor.py +1 -1
  553. mindspore/turbojpeg.dll +0 -0
  554. mindspore/utils/__init__.py +6 -3
  555. mindspore/utils/dryrun.py +140 -0
  556. mindspore/utils/hooks.py +81 -0
  557. mindspore/utils/runtime_execution_order_check.py +552 -0
  558. mindspore/utils/utils.py +138 -4
  559. mindspore/version.py +1 -1
  560. {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/METADATA +3 -3
  561. {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/RECORD +564 -395
  562. {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/entry_points.txt +1 -1
  563. mindspore/_install_custom.py +0 -43
  564. mindspore/common/_register_for_adapter.py +0 -74
  565. mindspore/common/_tensor_overload.py +0 -139
  566. mindspore/mindspore_np_dtype.dll +0 -0
  567. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
  568. mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
  569. mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
  570. mindspore/ops_generate/gen_aclnn_implement.py +0 -263
  571. mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
  572. mindspore/ops_generate/gen_pyboost_func.py +0 -1052
  573. mindspore/ops_generate/gen_utils.py +0 -209
  574. mindspore/ops_generate/op_proto.py +0 -145
  575. mindspore/ops_generate/template.py +0 -261
  576. mindspore/profiler/envprofiling.py +0 -254
  577. mindspore/profiler/profiling.py +0 -1926
  578. {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/WHEEL +0 -0
  579. {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/top_level.txt +0 -0
@@ -26,10 +26,10 @@ from mindspore.ops import functional as F
26
26
  from mindspore.ops.operations import nn_ops as NN_OPS
27
27
  from mindspore.ops.operations import _sequence_ops as seq
28
28
  import mindspore.common.dtype as mstype
29
- from mindspore.ops.function.math_func import logsumexp
29
+ from mindspore.ops.function.math_func import logsumexp, div
30
30
  from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
31
31
  from mindspore.common.tensor import Tensor
32
- from mindspore._c_expression import Tensor as Tensor_
32
+ from mindspore._c_expression import TensorPy as Tensor_
33
33
  from mindspore.ops._primitive_cache import _get_cache_prim
34
34
  from mindspore import _checkparam as validator
35
35
  from mindspore.ops.composite.multitype_ops._constexpr_utils import raise_value_error
@@ -40,22 +40,72 @@ from mindspore.ops.operations.nn_ops import ChannelShuffle
40
40
  from mindspore.ops.operations.nn_ops import TripletMarginLoss
41
41
  from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
42
42
  from mindspore.common.api import _function_forbid_reuse
43
- from mindspore.ops.auto_generate import log_softmax, dense, prelu, celu, relu, fast_gelu, silu, elu, sigmoid, relu6, \
44
- softmax_impl, swiglu, logsigmoid_op
45
- from mindspore.ops.auto_generate import group_norm_op, rms_norm, layer_norm_ext_op, batch_norm_ext_op, mse_loss_ext
43
+ from mindspore.ops.auto_generate import log_softmax, dense, prelu, celu, fast_gelu, silu, elu, sigmoid, relu6, \
44
+ softmax_impl, swiglu, logsigmoid_op, kl_div_op, divs_op
45
+ from mindspore.ops.auto_generate import relu_op, inplace_relu_op
46
+ from mindspore.ops.auto_generate import group_norm_op, rms_norm, add_rms_norm, layer_norm_ext_op, batch_norm_ext_op,\
47
+ mse_loss_ext
48
+ # 1
46
49
  from mindspore.ops.auto_generate import (reflection_pad_1d_op, reflection_pad_2d_op, add_layernorm_v2_op,
47
50
  reflection_pad_3d_op, # pylint: disable=W0611
48
51
  replication_pad_1d_op, replication_pad_2d_op, replication_pad_3d_op,
49
52
  constant_pad_nd_op, dropout_ext_op, reverse_v2_impl, avg_pool2d_op,
50
53
  upsample_nearest1d_op, upsample_nearest2d_op, upsample_nearest3d_op,
51
54
  upsample_linear1d_op, upsample_bilinear2d_op, upsample_bicubic2d_op,
52
- upsample_trilinear3d_impl, fill_scalar_op, floor_op)
53
- from mindspore.ops.auto_generate.gen_ops_prim import embedding_op, Convolution, ConstantPadND, MaxPoolWithIndices, \
54
- MaxPoolWithMask
55
+ upsample_trilinear3d_impl, fill_scalar_op, floor_op, nllloss_2d_op,
56
+ masked_fill_op, masked_select, ones, flatten_ext, conv_transpose2d)
57
+ # 2
58
+
59
+ # 3
60
+
61
+ # 4
62
+
63
+ # 5
64
+
65
+ # 6
66
+
67
+ # 7
68
+
69
+ # 8
70
+
71
+ # 9
72
+
73
+ # 10
74
+
75
+ # 11
76
+
77
+ # 12
78
+
79
+ # 13
80
+
81
+ # 14
82
+
83
+ # 15
84
+ from mindspore.ops.auto_generate import avg_pool3d_ext_op
85
+ # 16
86
+
87
+ # 17
88
+
89
+ # 18
90
+
91
+ # 19
92
+
93
+ # 20
94
+
95
+ from mindspore.ops.auto_generate.gen_ops_prim import embedding_op, MaxPoolWithIndices, \
96
+ PromptFlashAttention, MaxPoolWithMask
97
+ from mindspore.ops.auto_generate.gen_ops_prim import conv3d_ext_op, conv3d_padding_op, conv2d_ext_op, \
98
+ conv2d_padding_op, conv1d_ext_op, conv1d_padding_op, speed_fusion_attention_op
55
99
  from mindspore.common.generator import default_generator
56
100
  from mindspore.ops.auto_generate import hardshrink, hardsigmoid, hardswish
57
101
  from mindspore.ops.auto_generate import softshrink
102
+ from mindspore.ops.auto_generate import soft_margin_loss
103
+ from mindspore.ops.auto_generate import moe_token_permute, moe_token_unpermute
58
104
  from mindspore.ops.auto_generate import adaptive_avg_pool2d_ext_op
105
+ from mindspore.ops.auto_generate.pyboost_inner_prim import nllloss_impl
106
+ from mindspore.ops.auto_generate.pyboost_inner_prim import adaptive_max_pool2d_impl
107
+ from mindspore.ops.function.array_func import gather_ext
108
+ from mindspore.ops.operations.manually_defined import flash_attention_score, fused_infer_attention_score
59
109
 
60
110
  abs_ = P.Abs()
61
111
  add_ = P.Add()
@@ -111,7 +161,7 @@ check_int_const = validator.check_is_int
111
161
  check_non_negative_float_const = validator.check_non_negative_float
112
162
  check_string_const = constexpr(validator.check_string)
113
163
 
114
- generator_step_ = Tensor(1, mstype.int64)
164
+ generator_step_ = Tensor(12, mstype.int64)
115
165
 
116
166
 
117
167
  def adaptive_avg_pool2d(input, output_size):
@@ -153,11 +203,11 @@ def adaptive_avg_pool2d(input, output_size):
153
203
  .. math::
154
204
 
155
205
  out\_shape = \begin{cases}
156
- input\_shape[-2] + output\_size[1], & \text{if } output\_size text{ is (None, w);}\\
157
- output\_size[0] + input\_shape[-1], & \text{if } output\_size text{ is (h, None);}\\
158
- input\_shape[-2:], & \text{if } output\_size text{ is (None, None);}\\
159
- (h, h), & \text{if } output\_size text{ is h;}\\
160
- (h, w), & \text{if } output\_size text{ is (h, w)}
206
+ input\_shape[-2] + output\_size[1], & \text{if } output\_size \text{ is (None, w);}\\
207
+ output\_size[0] + input\_shape[-1], & \text{if } output\_size \text{ is (h, None);}\\
208
+ input\_shape[-2:], & \text{if } output\_size \text{ is (None, None);}\\
209
+ (h, h), & \text{if } output\_size \text{ is h;}\\
210
+ (h, w), & \text{if } output\_size \text{ is (h, w)}
161
211
  \end{cases}
162
212
 
163
213
  Raises:
@@ -247,11 +297,11 @@ def adaptive_avg_pool2d_ext(input, output_size):
247
297
  .. math::
248
298
 
249
299
  out\_shape = \begin{cases}
250
- input\_shape[-2] + output\_size[1], & \text{if } output\_size text{ is (None, w);}\\
251
- output\_size[0] + input\_shape[-1], & \text{if } output\_size text{ is (h, None);}\\
252
- input\_shape[-2:], & \text{if } output\_size text{ is (None, None);}\\
253
- (h, h), & \text{if } output\_size text{ is h;}\\
254
- (h, w), & \text{if } output\_size text{ is (h, w)}
300
+ input\_shape[-2] + output\_size[1], & \text{if } output\_size \text{ is (None, w);}\\
301
+ output\_size[0] + input\_shape[-1], & \text{if } output\_size \text{ is (h, None);}\\
302
+ input\_shape[-2:], & \text{if } output\_size \text{ is (None, None);}\\
303
+ (h, h), & \text{if } output\_size \text{ is h;}\\
304
+ (h, w), & \text{if } output\_size \text{ is (h, w)}
255
305
  \end{cases}
256
306
 
257
307
  Raises:
@@ -399,13 +449,15 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
399
449
 
400
450
  Args:
401
451
  input_x (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})`.
402
- kernel_size (int): The size of kernel window used to take the average value. Default: ``1`` .
403
- stride (Union(int, tuple[int])): The distance of kernel moving. `stride` can either be an int
452
+ kernel_size (int, optional): The size of kernel window used to take the average value. Default: ``1`` .
453
+ stride (Union(int, tuple[int]), optional): The distance of kernel moving. `stride` can either be an int
404
454
  number or a tuple of one int number. Default: ``1`` .
405
- padding (Union(int, tuple[int])): The pad value to be filled. `padding` can either be an integer
455
+ padding (Union(int, tuple[int]), optional): The pad value to be filled. `padding` can either be an integer
406
456
  or a tuple of one integer. Default: ``0`` .
407
- ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
408
- count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
457
+ ceil_mode (bool, optional): If True, apply ceil instead of floor to compute the output shape.
458
+ Default: ``False``.
459
+ count_include_pad (bool, optional): If True, include the zero-padding in the averaging calculation.
460
+ Default: ``True`` .
409
461
 
410
462
  Returns:
411
463
  Tensor of shape :math:`(N, C_{out}, L_{out})`.
@@ -620,13 +672,13 @@ def avg_pool2d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False,
620
672
  outputs regional average in the :math:`(H_{in}, W_{in})` -dimension.
621
673
  Given kernel size :math:`(kH, kW)` and `stride` , the operation is as follows.
622
674
 
623
- .. note::
624
- On the Atlas platform, when calculating the input, the precision is degraded from float32 to float16.
625
-
626
675
  .. math::
627
676
  \text{output}(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
628
677
  \text{input}(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
629
678
 
679
+ .. note::
680
+ On the Atlas platform, when calculating the input, the precision is degraded from float32 to float16.
681
+
630
682
  Args:
631
683
  input (Tensor): Tensor of shape :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`.
632
684
  kernel_size (Union[int, tuple[int], list[int]]): The size of kernel used to take the average value.
@@ -779,6 +831,77 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
779
831
  return avg_pool_op(input_x)
780
832
 
781
833
 
834
+ def avg_pool3d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True,
835
+ divisor_override=None):
836
+ r"""
837
+ Applies a 3D average pooling over an input Tensor which can be regarded as a composition of
838
+ 3D input planes. Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})` ,
839
+ outputs regional average in the :math:`(D_{in}, H_{in}, W_{in})` -dimension.
840
+ Given kernel size :math:`(kD, kH, kW)` and `stride` , the operation is as follows.
841
+
842
+ .. math::
843
+ \text{output}(N_i, C_j, d, h, w) = \frac{1}{kD * kH * kW} \sum_{l=0}^{kD-1} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
844
+
845
+ \text{input}(N_i, C_j, stride[0] \times d + l, stride[1] \times h + m, stride[2] \times w + n)
846
+
847
+ .. warning::
848
+ This is an experimental API that is subject to change or deletion.
849
+
850
+ Note:
851
+ This interface currently does not support Atlas A2 training series products.
852
+
853
+ Args:
854
+ input (Tensor): Tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
855
+ kernel_size (Union[int, tuple[int], list[int]]): The size of kernel used to take the average value.
856
+ Can be a single number or a tuple :math:`(kD, kH, kW)` .
857
+ stride (Union[int, tuple[int], list[int]], optional): The distance of kernel moving.
858
+ Can be a single number or a tuple :math:`(sD, sH, sW)` . Default: ``None``,
859
+ where its value is equal to `kernel_size`.
860
+ padding (Union[int, tuple[int], list[int]], optional): Implicit zero padding to be added on both sides.
861
+ Can be a single number or a tuple :math:`(padD, padH, padW)` . Default: ``0``.
862
+ ceil_mode (bool, optional): If True, apply ceil instead of floor to compute the output shape.
863
+ Default: ``False``.
864
+ count_include_pad (bool, optional): If True, include the zero-padding in the averaging calculation.
865
+ Default: ``True`` .
866
+ divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
867
+ otherwise size of pooling region will be used. Default: ``None``.
868
+
869
+ Returns:
870
+ Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(C, D_{out}, H_{out}, W_{out})`.
871
+
872
+ .. math::
873
+ \begin{array}{ll} \\
874
+ D_{out} = \frac{D_{in} + 2 \times padding[0] - kernel\_size[0]}{stride[0]} + 1 \\
875
+ H_{out} = \frac{H_{in} + 2 \times padding[1] - kernel\_size[0]}{stride[1]} + 1 \\
876
+ W_{out} = \frac{W_{in} + 2 \times padding[2] - kernel\_size[1]}{stride[2]} + 1
877
+ \end{array}
878
+
879
+ Raises:
880
+ TypeError: If `input` is not a Tensor.
881
+ TypeError: If `kernel_size` or `stride` is neither int nor tuple.
882
+ TypeError: If `ceil_mode` or `count_include_pad` is not a bool.
883
+ TypeError: If `divisor_override` is not an int or None.
884
+ ValueError: If the dimension of `input` is not equal to `4` or `5`.
885
+ ValueError: If `kernel_size` or `stride` is less than 1.
886
+ ValueError: If value of `padding` is less than `0`.
887
+ ValueError: If `kernel_size`, `padding` or `stride` is a tuple whose length is not equal to `1` or `3`.
888
+
889
+ Supported Platforms:
890
+ ``Ascend``
891
+
892
+ Examples:
893
+ >>> import mindspore
894
+ >>> import numpy as np
895
+ >>> from mindspore import Tensor, ops
896
+ >>> input_x = Tensor(np.arange(1 * 2 * 2 * 2 * 3).reshape((1, 2, 2, 2, 3)), mindspore.float16)
897
+ >>> output = ops.avg_pool3d_ext(input_x, kernel_size=2, stride=1)
898
+ >>> print(output)
899
+ [[[[[ 5. 6.]]]
900
+ [[[17. 18.]]]]]
901
+ """
902
+ return avg_pool3d_ext_op(input, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
903
+
904
+
782
905
  @constexpr
783
906
  def is_ascend_backend():
784
907
  """Check if the Ascend is used"""
@@ -898,7 +1021,7 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
898
1021
  \end{align}
899
1022
 
900
1023
  Note:
901
- Ascend platform only supports float16 type for input.
1024
+ In KBK mode, `output_size` does not support mutable.
902
1025
 
903
1026
  Args:
904
1027
  input (Tensor): A 3D or 4D tensor,
@@ -907,7 +1030,7 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
907
1030
  or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
908
1031
  If it is None, it means the output size is the same as the input size.
909
1032
 
910
- return_indices (bool): If `return_indices` is ``True`` , the indices of max value would be output.
1033
+ return_indices (bool, optional): If `return_indices` is ``True`` , the indices of max value would be output.
911
1034
  Default: ``False`` .
912
1035
 
913
1036
  Returns:
@@ -959,11 +1082,17 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
959
1082
  [[8. 9.]]
960
1083
  [[8. 9.]]]]
961
1084
  """
1085
+ output_size_ = None
962
1086
  _check_adaptive_max_pool2d(return_indices)
963
- _adaptive_max_pool2d = _get_cache_prim(NN_OPS.AdaptiveMaxPool2D)(output_size)
964
- out = _adaptive_max_pool2d(input)
965
- output = out if return_indices else out[0]
966
- return output
1087
+
1088
+ if isinstance(output_size, int):
1089
+ output_size_ = (output_size, output_size)
1090
+ else:
1091
+ output_size_ = tuple(-1 if val is None else val for val in output_size)
1092
+
1093
+ if return_indices:
1094
+ return adaptive_max_pool2d_impl(input, output_size_)
1095
+ return adaptive_max_pool2d_impl(input, output_size_)[0]
967
1096
 
968
1097
 
969
1098
  def adaptive_max_pool3d(input, output_size, return_indices=False):
@@ -1438,7 +1567,7 @@ def dropout(input, p=0.5, training=True, seed=None):
1438
1567
  input (Tensor): The input Tensor of shape :math:`(*, N)`, with data type of float16, float32 or float64.
1439
1568
  p (float, optional): The dropping rate, between 0 and 1, e.g. p = 0.1,
1440
1569
  means dropping out 10% of input units. Default: ``0.5`` .
1441
- training (bool): Apply dropout if is True. Default: ``True``.
1570
+ training (bool, optional): Apply dropout if is True. Default: ``True``.
1442
1571
  seed (int, optional): Seed is used as entropy source for Random number engines generating pseudo-random numbers.
1443
1572
  Default: ``None`` , which will be treated as ``0`` .
1444
1573
 
@@ -1473,7 +1602,7 @@ def dropout(input, p=0.5, training=True, seed=None):
1473
1602
 
1474
1603
 
1475
1604
  @_function_forbid_reuse
1476
- def dropout_ext(input, p=0.5, training=True):
1605
+ def dropout_ext(input, p=0.5, training=True, inplace=False):
1477
1606
  r"""
1478
1607
  During training, randomly zeroes some of the elements of the input tensor
1479
1608
  with probability `p` from a Bernoulli distribution. It plays the role of reducing neuron correlation and
@@ -1482,10 +1611,12 @@ def dropout_ext(input, p=0.5, training=True):
1482
1611
 
1483
1612
  Args:
1484
1613
  input (Tensor): The input Tensor of shape :math:`(*, N)`.
1485
- p (float): The dropping rate of input neurons, between 0 and 1, e.g. `p` = 0.1,
1614
+ p (float, optional): The dropping rate of input neurons, between 0 and 1, e.g. `p` = 0.1,
1486
1615
  means dropping out 10% of input neurons. Default: ``0.5`` .
1487
- training (bool): Apply dropout if it is ``True`` , if it is ``False`` , the input is returned directly,
1488
- and `p` is invalid. Default: ``True``.
1616
+ training (bool, optional): Apply dropout if it is ``True`` ,
1617
+ if it is ``False`` , the input is returned directly,
1618
+ and `p` is invalid. Default: ``True`` .
1619
+ inplace (bool, optional): If set to ``True`` , will do this operation in-place. Default: ``False`` .
1489
1620
 
1490
1621
  Returns:
1491
1622
  - **output** (Tensor) - Zeroed tensor, with the same shape and data type as `input`.
@@ -1506,10 +1637,14 @@ def dropout_ext(input, p=0.5, training=True):
1506
1637
  (2, 2)
1507
1638
  """
1508
1639
  check_bool_const(training, "training", "dropout_ext")
1509
- if training is False:
1640
+ check_bool_const(inplace, "inplace", "dropout_ext")
1641
+ if not training:
1510
1642
  return input
1511
1643
  seed, offset = default_generator._step(generator_step_) # pylint: disable=protected-access
1512
1644
  out, _ = dropout_ext_op(input, p, seed, offset)
1645
+ if inplace:
1646
+ input.copy_(out)
1647
+ return input
1513
1648
  return out
1514
1649
 
1515
1650
 
@@ -1610,7 +1745,7 @@ def dropout2d(input, p=0.5, training=True):
1610
1745
  input (Tensor): A `4D` tensor with shape :math:`(N, C, H, W)`, where `N` is the batch size, `C` is the number
1611
1746
  of channels, `H` is the feature height, and `W` is the feature width. The data type must be int8,
1612
1747
  int16, int32, int64, float16, float32 or float64.
1613
- p (float): The dropping probability of a channel, between 0 and 1, e.g. `p` = 0.8,
1748
+ p (float): The dropping probability of a channel. The range is [0.0, 1.0], e.g. `p` = 0.8,
1614
1749
  which means dropping out 80% of channels. Default: ``0.5`` .
1615
1750
  training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: ``True`` .
1616
1751
 
@@ -2022,6 +2157,75 @@ def kl_div(logits, labels, reduction='mean'):
2022
2157
  return _get_cache_prim(P.KLDivLoss)(reduction=reduction)(logits, labels)
2023
2158
 
2024
2159
 
2160
+ def kl_div_ext(input, target, reduction='mean', log_target=False):
2161
+ r"""
2162
+ Computes the Kullback-Leibler divergence between the `input` and the `target`.
2163
+
2164
+ For tensors of the same shape :math:`x` and :math:`y`,
2165
+ the updating formulas of KLDivLoss algorithm are as follows,
2166
+
2167
+ .. math::
2168
+ L(x, y) = y \cdot (\log y - x)
2169
+
2170
+ Then,
2171
+
2172
+ .. math::
2173
+ \ell(x, y) = \begin{cases}
2174
+ L(x, y), & \text{if reduction} = \text{'none';}\\
2175
+ \operatorname{mean}(L(x, y)), & \text{if reduction} = \text{'mean';}\\
2176
+ \operatorname{sum}(L(x, y)) / x.\operatorname{shape}[0], & \text{if reduction} = \text{'batchmean';}\\
2177
+ \operatorname{sum}(L(x, y)), & \text{if reduction} = \text{'sum'.}
2178
+ \end{cases}
2179
+
2180
+ where :math:`x` represents `input`, :math:`y` represents `target`, and :math:`\ell(x, y)` represents the output.
2181
+
2182
+ Note:
2183
+ The output aligns with the mathematical definition of Kullback-Leibler divergence
2184
+ only when `reduction` is set to ``'batchmean'``.
2185
+
2186
+ Args:
2187
+ input (Tensor): The input Tensor. The data type must be float16, float32 or bfloat16(only supported by Atlas A2
2188
+ training series products).
2189
+ target (Tensor): The target Tensor which has the same type as `input`. The shapes of `target` and `input`
2190
+ should be broadcastable.
2191
+ reduction (str, optional): Specifies the reduction to be applied to the output. Default: ``'mean'``.
2192
+ log_target (bool, optional): Specifies whether `target` is passed in the log space. Default: ``False``.
2193
+
2194
+ Returns:
2195
+ Tensor, has the same dtype as `input`. If `reduction` is ``'none'``, then output has the shape as broadcast
2196
+ result of the `input` and `target`. Otherwise, it is a scalar Tensor.
2197
+
2198
+ Raises:
2199
+ TypeError: If neither `input` nor `target` is a Tensor.
2200
+ TypeError: If dtype of `input` or `target` is not float16, float32 or bfloat16.
2201
+ TypeError: If dtype of `target` is not the same as `input`.
2202
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``, ``'batchmean'``.
2203
+ ValueError: If shapes of `target` and `input` can not be broadcastable.
2204
+
2205
+ Supported Platforms:
2206
+ ``Ascend``
2207
+
2208
+ Examples:
2209
+ >>> import mindspore as ms
2210
+ >>> from mindspore import ops
2211
+ >>> import numpy as np
2212
+ >>> input = ms.Tensor(np.array([[0.5, 0.5], [0.4, 0.6]]), ms.float32)
2213
+ >>> target = ms.Tensor(np.array([[0., 1.], [1., 0.]]), ms.float32)
2214
+ >>> output = ops.kl_div_ext(input, target, reduction='mean', log_target=False)
2215
+ >>> print(output)
2216
+ -0.225
2217
+ """
2218
+ if reduction == 'batchmean':
2219
+ reduced = kl_div_op(input, target, 'sum', log_target)
2220
+ else:
2221
+ reduced = kl_div_op(input, target, reduction, log_target)
2222
+
2223
+ if reduction == 'batchmean' and input.ndim != 0:
2224
+ reduced = divs_op(reduced, input.shape[0])
2225
+
2226
+ return reduced
2227
+
2228
+
2025
2229
  @constexpr
2026
2230
  def _check_axis_in_range(axis, ndim):
2027
2231
  """Checks axes are with the bounds of ndim"""
@@ -2081,33 +2285,22 @@ def _check_input_tensor(arg_name, *tensors):
2081
2285
 
2082
2286
  def flip(input, dims):
2083
2287
  """
2084
- Reverses the order of elements in a tensor along the given axis.
2085
-
2086
- The shape of the tensor is preserved, but the elements are reordered.
2288
+ Reverses elements in a tensor along the given dims.
2087
2289
 
2088
2290
  Args:
2089
- input (Tensor): Input tensor.
2090
- dims (Union[list[int], tuple[int]]): Axis or axes along which to flip over.
2091
- Flipping is performed on all of the axes specified in the tuple,
2092
- If `dims` is a tuple of integers contains negative, it counts from the last to the first axis.
2291
+ input (Tensor): The input tensor.
2292
+ dims (Union[list[int], tuple[int]]): The dimension to flip.
2093
2293
 
2094
2294
  Returns:
2095
- Tensor, with the entries of `dims` reversed.
2096
-
2097
- Raises:
2098
- TypeError: If the input is not a tensor.
2099
- ValueError: If `dims` is None.
2100
- ValueError: If `dims` is not a list/tuple of ints.
2295
+ Tensor
2101
2296
 
2102
2297
  Supported Platforms:
2103
2298
  ``Ascend`` ``GPU`` ``CPU``
2104
2299
 
2105
2300
  Examples:
2106
2301
  >>> import mindspore
2107
- >>> from mindspore import ops
2108
- >>> import numpy as np
2109
- >>> input = mindspore.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
2110
- >>> output = ops.flip(input, (0, 2))
2302
+ >>> input = mindspore.tensor(mindspore.ops.arange(1, 9).reshape((2, 2, 2)))
2303
+ >>> output = mindspore.ops.flip(input, (0, 2))
2111
2304
  >>> print(output)
2112
2305
  [[[6 5]
2113
2306
  [8 7]]
@@ -2120,26 +2313,21 @@ def flip(input, dims):
2120
2313
 
2121
2314
  def flipud(input):
2122
2315
  """
2123
- Flips the elements of each column in the up/down direction, while preserving the rows of the input tensor.
2316
+ Flip the input tensor in up/down direction.
2124
2317
 
2125
2318
  Args:
2126
- input (Tensor): Input array.
2319
+ input (Tensor): The input tensor, the dimension must be at least 2.
2127
2320
 
2128
2321
  Returns:
2129
- Tensor after the flip.
2130
-
2131
- Raises:
2132
- TypeError: If the input is not a tensor.
2322
+ Tensor
2133
2323
 
2134
2324
  Supported Platforms:
2135
2325
  ``Ascend`` ``GPU`` ``CPU``
2136
2326
 
2137
2327
  Examples:
2138
- >>> import mindspore as ms
2139
- >>> from mindspore import ops
2140
- >>> import numpy as np
2141
- >>> input = ms.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
2142
- >>> output = ops.flipud(input)
2328
+ >>> import mindspore
2329
+ >>> input = mindspore.tensor(mindspore.ops.arange(1, 9).reshape((2, 2, 2)))
2330
+ >>> output = mindspore.ops.flipud(input)
2143
2331
  >>> print(output)
2144
2332
  [[[5 6]
2145
2333
  [7 8]]
@@ -2151,26 +2339,21 @@ def flipud(input):
2151
2339
 
2152
2340
  def fliplr(input):
2153
2341
  """
2154
- Flips the elements of each row in the left/right direction, while preserving the columns of the input tensor.
2342
+ Flip the input tensor in left/right direction.
2155
2343
 
2156
2344
  Args:
2157
- input (Tensor): Input tensor.
2345
+ input (Tensor): The input tensor, the dimension must be at least 2.
2158
2346
 
2159
2347
  Returns:
2160
- Tensor after the flip.
2161
-
2162
- Raises:
2163
- TypeError: If the input is not a tensor.
2348
+ Tensor
2164
2349
 
2165
2350
  Supported Platforms:
2166
2351
  ``Ascend`` ``GPU`` ``CPU``
2167
2352
 
2168
2353
  Examples:
2169
- >>> import mindspore as ms
2170
- >>> from mindspore import ops
2171
- >>> import numpy as np
2172
- >>> input = ms.Tensor(np.arange(1, 9).reshape((2, 2, 2)))
2173
- >>> output = ops.fliplr(input)
2354
+ >>> import mindspore
2355
+ >>> input = mindspore.tensor(mindspore.ops.arange(1, 9).reshape((2, 2, 2)))
2356
+ >>> output = mindspore.ops.fliplr(input)
2174
2357
  >>> print(output)
2175
2358
  [[[3 4]
2176
2359
  [1 2]]
@@ -2182,29 +2365,33 @@ def fliplr(input):
2182
2365
 
2183
2366
  def is_floating_point(input):
2184
2367
  """
2185
- Judge whether the data type of `input` is a floating point data type i.e., one of mindspore.float64,
2186
- mindspore.float32, mindspore.float16.
2368
+ If the data type of the tensor is a floating point data type, return True. Otherwise return False.
2187
2369
 
2188
2370
  Args:
2189
2371
  input (Tensor): The input Tensor.
2190
2372
 
2191
2373
  Returns:
2192
- Bool. If the dtype of `input` is a floating point data type, return ``True`` . Otherwise, return ``False`` .
2374
+ Bool
2193
2375
 
2194
2376
  Supported Platforms:
2195
2377
  ``Ascend`` ``GPU`` ``CPU``
2196
2378
 
2197
2379
  Examples:
2198
- >>> import mindspore as ms
2199
- >>> from mindspore import ops
2200
- >>> from mindspore import Tensor
2201
- >>> x = ms.Tensor([1, 2, 3], ms.float32)
2202
- >>> y = ms.Tensor([1, 2, 3], ms.int64)
2203
- >>> output = ops.is_floating_point(x)
2204
- >>> output2 = ops.is_floating_point(y)
2205
- >>> print(output)
2380
+ >>> import mindspore
2381
+ >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.float64)
2382
+ >>> mindspore.ops.is_floating_point(input)
2206
2383
  True
2207
- >>> print(output2)
2384
+ >>>
2385
+ >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.float32)
2386
+ >>> mindspore.ops.is_floating_point(input)
2387
+ True
2388
+ >>>
2389
+ >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.float16)
2390
+ >>> mindspore.ops.is_floating_point(input)
2391
+ True
2392
+ >>>
2393
+ >>> input = mindspore.tensor([False, 0j, 1, 2.1, 1+2j], mindspore.int32)
2394
+ >>> mindspore.ops.is_floating_point(input)
2208
2395
  False
2209
2396
  """
2210
2397
  return input.dtype in [mstype.float32, mstype.bfloat16, mstype.float16, mstype.float64]
@@ -2326,12 +2513,20 @@ def interpolate(input,
2326
2513
  If scale_factor is a tuple or list, its length should be the same as the number of dimensions in input
2327
2514
  after removing the first two dimensions N, C.
2328
2515
  One and only one of size and scale_factor can be set to None. Default: ``None`` .
2329
- mode (str): The sampling algorithm.
2330
- One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
2331
- 'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
2332
- knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
2333
-
2334
- align_corners (bool): Whether to use corner alignment for coordinate mapping. Assuming a transformation is
2516
+ mode (str, optional): The sampling algorithm. Default: ``"nearest"`` .
2517
+ One of the following sampling methods can be used:
2518
+
2519
+ - 'nearest': the nearest neighbours interpolation.
2520
+ - 'linear': Linear interpolation, 3D only.
2521
+ - 'bilinear': Bilinear interpolation, 4D only.
2522
+ - 'trilinear': Trilinear interpolation, 5D only.
2523
+ - 'bicubic': Double trilinear interpolation, 4D only.
2524
+ - 'area': area interpolation.
2525
+ - 'nearest-exact': matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
2526
+ knows issues with `nearest`, for 3D and 4D.
2527
+
2528
+ align_corners (bool, optional): Whether to use corner alignment for coordinate mapping.
2529
+ Assuming a transformation is
2335
2530
  applied to the input Tensor along the x-axis, the specific calculation formula is as follows:
2336
2531
 
2337
2532
  .. code-block::
@@ -2348,9 +2543,10 @@ def interpolate(input,
2348
2543
 
2349
2544
  This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``False`` .
2350
2545
  recompute_scale_factor (bool, optional): Recalculate `scale_factor`.
2351
- If True, the parameter `size` will be calculated using the value of the `scale_factor`,
2352
- and finally scaled using the value of `size`.
2353
- If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: ``None`` .
2546
+
2547
+ - If True, the parameter `size` will be calculated using the value of the `scale_factor`,
2548
+ and finally scaled using the value of `size`.
2549
+ - If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: ``None`` .
2354
2550
 
2355
2551
  .. note::
2356
2552
  The 'nearest-exact' mode is the same as the nearest-neighbor interpolation algorithm used in
@@ -2415,8 +2611,8 @@ def interpolate(input,
2415
2611
  >>> input = Tensor([[[1, 2, 3], [4, 5, 6]]], mindspore.float32)
2416
2612
  >>> output = ops.interpolate(input, size=(6,), mode='nearest')
2417
2613
  >>> print(output)
2418
- [[[1. 1. 2. 2. 3. 3.]
2419
- [4. 4. 5. 5. 6. 6.]]]
2614
+ [[[1. 1. 2. 2. 3. 3.]
2615
+ [4. 4. 5. 5. 6. 6.]]]
2420
2616
  """
2421
2617
 
2422
2618
  def run_nearest(x, size, align_corners=None, scale_factor=None):
@@ -2667,7 +2863,7 @@ def interpolate_ext(input,
2667
2863
  r"""
2668
2864
  Samples the input Tensor to the given size or scale_factor by using one of the interpolate algorithms.
2669
2865
 
2670
- .. warnings:
2866
+ .. warning::
2671
2867
  This is an experimental API that is subject to change or deletion.
2672
2868
 
2673
2869
  .. note::
@@ -2675,7 +2871,7 @@ def interpolate_ext(input,
2675
2871
  is not supported.
2676
2872
  - In 'nearest' mode, there may exist precision problem in the scenarios, where input is 3-D/4-D Tensor
2677
2873
  and the image is scaled by scale_factor.
2678
- - `mode` and `scale_factor` should be constants.
2874
+ - `mode` and `recompute_scale_factor` should be constants.
2679
2875
 
2680
2876
  Args:
2681
2877
  input (Tensor): Tensor to be resized.
@@ -2690,9 +2886,11 @@ def interpolate_ext(input,
2690
2886
  after removing the first two dimensions N, C.
2691
2887
  One and only one of size and scale_factor can be set to None. Default: ``None`` .
2692
2888
  mode (str): The sampling algorithm.
2693
- One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), and 'bicubic' (4D only).
2889
+ One of 'nearest', 'linear' (3D only),
2890
+ 'bilinear' (4D only), 'trilinear' (5D only), and 'bicubic' (4D only).
2694
2891
  Default: ``"nearest"`` .
2695
- align_corners (bool): Whether to use corner alignment for coordinate mapping. Assuming a transformation is
2892
+ align_corners (bool, optional): Whether to use corner alignment for coordinate mapping.
2893
+ Assuming a transformation is
2696
2894
  applied to the input Tensor along the x-axis, the specific calculation formula is as follows:
2697
2895
 
2698
2896
  .. code-block::
@@ -2707,7 +2905,7 @@ def interpolate_ext(input,
2707
2905
  the corresponding coordinate of the original
2708
2906
  data along the x-axis.
2709
2907
 
2710
- This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``False`` .
2908
+ This is only valid for ``'linear'``, ``'bilinear'``, or ``'bicubic'`` modes. Default: ``None`` .
2711
2909
  recompute_scale_factor (bool, optional): Recalculate `scale_factor`.
2712
2910
  If True, the parameter `size` will be calculated using the value of the `scale_factor`,
2713
2911
  and finally scaled using the value of `size`.
@@ -2740,20 +2938,6 @@ def interpolate_ext(input,
2740
2938
  Returns:
2741
2939
  Tensor, sampled, whose dimensions and dtype are the same as `input`.
2742
2940
 
2743
- Shape:
2744
- - Input: :math:`(N, C, W_{in})`, :math:`(N, C, H_{in}, W_{in})` or :math:`(N, C, D_{in}, H_{in}, W_{in})`
2745
- - Output: :math:`(N, C, W_{out})`, :math:`(N, C, H_{out}, W_{out})`
2746
- or :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
2747
-
2748
- .. math::
2749
- D_{out} = \left\lfloor D_{in} \times \text{scale\_factor} \right\rfloor
2750
-
2751
- .. math::
2752
- H_{out} = \left\lfloor H_{in} \times \text{scale\_factor} \right\rfloor
2753
-
2754
- .. math::
2755
- W_{out} = \left\lfloor W_{in} \times \text{scale\_factor} \right\rfloor
2756
-
2757
2941
  Raises:
2758
2942
  TypeError: `input` is not a Tensor.
2759
2943
  ValueError: Both `size` and `scale_factor` are not empty.
@@ -2771,12 +2955,12 @@ def interpolate_ext(input,
2771
2955
 
2772
2956
  Examples:
2773
2957
  >>> import mindspore
2774
- >>> from mindspore import Tensor, mint
2958
+ >>> from mindspore import Tensor, ops
2775
2959
  >>> input = Tensor([[[1, 2, 3], [4, 5, 6]]], mindspore.float32)
2776
- >>> output = mint.interpolate(input, size=(6,), mode='nearest')
2960
+ >>> output = ops.interpolate_ext(input, size=(6,), mode='nearest')
2777
2961
  >>> print(output)
2778
- [[[1. 1. 2. 2. 3. 3.]
2779
- [4. 4. 5. 5. 6. 6.]]]
2962
+ [[[1. 1. 2. 2. 3. 3.]
2963
+ [4. 4. 5. 5. 6. 6.]]]
2780
2964
  """
2781
2965
 
2782
2966
  def run_nearest(x, size, align_corners=None, scale_factor=None):
@@ -2914,58 +3098,6 @@ def softsign(x):
2914
3098
  return softsign_(x)
2915
3099
 
2916
3100
 
2917
- def soft_margin_loss(input, target, reduction='mean'):
2918
- r"""
2919
- Calculate the soft margin loss of input and target.
2920
-
2921
- Creates a criterion that optimizes a two-class classification
2922
- logistic loss between input tensor :math:`x` and target tensor :math:`y`
2923
- (containing 1 or -1).
2924
-
2925
- .. math::
2926
- \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
2927
-
2928
- where :math:`x.nelement()` is the number of elements of :math:`x`.
2929
-
2930
- .. warning::
2931
- This is an experimental API that is subject to change or deletion.
2932
-
2933
- Args:
2934
- input (Tensor): Predict data. Data type must be float16 or float32.
2935
- target (Tensor): Ground truth data, with the same type and shape as `input`.
2936
- reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2937
- ``'sum'`` . Default: ``'mean'`` .
2938
-
2939
- - ``'none'``: no reduction will be applied.
2940
- - ``'mean'``: compute and return the mean of elements in the output.
2941
- - ``'sum'``: the output elements will be summed.
2942
-
2943
- Outputs:
2944
- Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `input`.
2945
- Otherwise, a scalar value will be returned.
2946
-
2947
- Raises:
2948
- TypeError: If `input` or `target` is not a Tensor.
2949
- TypeError: If dtype of `input` or `target` is neither float16 nor float32.
2950
- ValueError: If shape of `input` is not the same as that of `target`.
2951
- ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
2952
-
2953
- Supported Platforms:
2954
- ``Ascend`` ``GPU``
2955
-
2956
- Examples:
2957
- >>> import mindspore
2958
- >>> import numpy as np
2959
- >>> from mindspore import Tensor, ops
2960
- >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
2961
- >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
2962
- >>> output = ops.soft_margin_loss(logits, labels)
2963
- >>> print(output)
2964
- 0.6764238
2965
- """
2966
- soft_margin_loss_op = _get_cache_prim(P.SoftMarginLoss)(reduction=reduction)
2967
- output = soft_margin_loss_op(input, target)
2968
- return output
2969
3101
 
2970
3102
 
2971
3103
  def softmax(input, axis=-1, *, dtype=None):
@@ -3030,8 +3162,6 @@ def softmax_ext(input, dim=None, dtype=None):
3030
3162
  input (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
3031
3163
  additional dimensions.
3032
3164
  dim (int, optional): The dim to perform the Softmax operation. Default: ``None`` .
3033
-
3034
- Keyword Args:
3035
3165
  dtype (:class:`mindspore.dtype`, optional): When set, `input` will be converted to the specified type,
3036
3166
  `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: ``None`` .
3037
3167
 
@@ -3264,7 +3394,7 @@ def selu(input_x):
3264
3394
 
3265
3395
  def logsigmoid(x):
3266
3396
  r"""
3267
- Applies logsigmoid activation element-wise. The input is a Tensor with any valid shape.
3397
+ Applies LogSigmoid activation element-wise. The input is a Tensor with any valid shape.
3268
3398
 
3269
3399
  Logsigmoid is defined as:
3270
3400
 
@@ -3631,7 +3761,7 @@ def _replication_pad(input, pad):
3631
3761
  return out
3632
3762
 
3633
3763
 
3634
- def pad_ext(input, pad, mode='constant', value=0.0):
3764
+ def pad_ext(input, pad, mode='constant', value=None):
3635
3765
  r"""
3636
3766
  Pads the input tensor according to the pad.
3637
3767
 
@@ -3679,7 +3809,7 @@ def pad_ext(input, pad, mode='constant', value=0.0):
3679
3809
 
3680
3810
  value (Union[int, float, None], optional): Valid only in ``'constant'`` mode.
3681
3811
  Set the padding value in ``'constant'`` mode. If the value is None, 0 is used as the default padding value.
3682
- Default: ``0.0`` .
3812
+ Default: ``None`` .
3683
3813
 
3684
3814
  Returns:
3685
3815
  Tensor, the tensor after padding.
@@ -3689,7 +3819,7 @@ def pad_ext(input, pad, mode='constant', value=0.0):
3689
3819
  TypeError: If `input` is not a Tensor.
3690
3820
  ValueError: If length of `pad` is not even.
3691
3821
  ValueError: If length of `pad` is greater than 6.
3692
- ValueError: If `mode` is not ``'constant'`` and `value` not ``None``.
3822
+ ValueError: If `mode` is not ``'constant'`` and `value` is neither ``None`` nor 0.
3693
3823
 
3694
3824
  Supported Platforms:
3695
3825
  ``Ascend``
@@ -3717,7 +3847,7 @@ def pad_ext(input, pad, mode='constant', value=0.0):
3717
3847
  value = 0 if value is None else value
3718
3848
  out = constant_pad_nd_op(input, pad, value)
3719
3849
  else:
3720
- if value != 0.0:
3850
+ if value is not None and value != 0:
3721
3851
  raise ValueError(f"Padding mode {mode} doesn\'t take in value argument.")
3722
3852
  if mode == "circular":
3723
3853
  out = _circular_pad(input, pad)
@@ -3897,9 +4027,11 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
3897
4027
  `Empirical Evaluation of Rectified Activations in Convolution Network <https://arxiv.org/pdf/1505.00853.pdf>`_ .
3898
4028
 
3899
4029
  Args:
3900
- input (Tensor): The input of rrelu is a Tensor of any dimension.
3901
- lower (Union[int, float]): Slope of the activation function at x < 0. Default: ``1.0 / 8`` .
3902
- upper (Union[int, float]): Slope of the activation function at x < 0. Default: ``1.0 / 3`` .
4030
+ input (Tensor): The input of rrelu is a Tensor of any dimension.
4031
+ lower (Union[int, float]): Slope of the activation function at data of `input` is less than 0.
4032
+ Default: ``1.0 / 8`` .
4033
+ upper (Union[int, float]): Slope of the activation function at data of `input` is less than 0.
4034
+ Default: ``1.0 / 3`` .
3903
4035
 
3904
4036
  Returns:
3905
4037
  Tensor, after rrelu, has the same type and shape as the `input`.
@@ -4161,7 +4293,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
4161
4293
  N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
4162
4294
  classes.
4163
4295
 
4164
- If `reduction` is not ``None`` (default ``'mean'``), then
4296
+ If `reduction` is not ``'None'`` (default ``'mean'``), then
4165
4297
 
4166
4298
  .. math::
4167
4299
 
@@ -4271,67 +4403,364 @@ def _nll_loss(inputs, target, target_dim=-1, weight=None, ignore_index=None, red
4271
4403
  return loss
4272
4404
 
4273
4405
 
4274
- def l1_loss(input, target, reduction='mean'):
4406
+ def nll_loss_ext(input, target, weight=None, ignore_index=-100, reduction='mean'):
4275
4407
  r"""
4276
- Calculate the mean absolute error between the `input` value and the `target` value.
4277
-
4278
- Assuming that the :math:`x` and :math:`y` (predicted and target value) are 1-D Tensor,
4279
- length :math:`N`, `reduction` is set to ``'none'``, then calculate the loss of
4280
- :math:`x` and :math:`y` without dimensionality reduction.
4408
+ Gets the negative log likelihood loss between input and target.
4281
4409
 
4282
- The formula is as follows:
4410
+ The nll loss with reduction=none can be described as:
4283
4411
 
4284
4412
  .. math::
4285
- \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad \text{with } l_n = \left| x_n - y_n \right|,
4286
4413
 
4287
- where :math:`N` is the batch size.
4414
+ \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top},
4415
+ \quad l_{n}=-w_{t_{n}} x_{n, t_{n}},
4416
+ \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}
4417
+ \{c \not= \text{ignore_index}\},
4288
4418
 
4289
- If `reduction` is ``'mean'`` or ``'sum'`` , then:
4419
+ where :math:`x` is the input, :math:`t` is the target, :math:`w` is the weight,
4420
+ :math:`N` is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index,
4421
+ where :math:`C` is the number of classes.
4422
+
4423
+ If `reduction` is not ``'None'`` (default ``'mean'``), then
4290
4424
 
4291
4425
  .. math::
4292
- \ell(x, y) =
4293
- \begin{cases}
4294
- \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
4295
- \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.}
4296
- \end{cases}
4426
+
4427
+ \ell(x, t)=\left\{\begin{array}{ll}
4428
+ \sum_{n=1}^{N} \frac{1}{\sum_{n=1}^{N} w_{t n}} l_{n}, & \text { if reduction }=\text { 'mean', } \\
4429
+ \sum_{n=1}^{N} l_{n}, & \text { if reduction }=\text { 'sum' }
4430
+ \end{array}\right.
4431
+
4432
+ .. warning::
4433
+ This is an experimental API that is subject to change or deletion.
4297
4434
 
4298
4435
  Args:
4299
- input (Tensor): Predicted value, Tensor of any dimension.
4300
- target (Tensor): Target value, usually has the same shape as the `input`.
4301
- If `input` and `target` have different shape, make sure they can broadcast to each other.
4436
+ input (Tensor): :math:`(N)` or :math:`(N, C)` where `C = number of classes` , `N = batch size` ,
4437
+ or :math:`(N, C, d_1, d_2, ..., d_K)` (for high-dimensional data).
4438
+ `input` is expected to be log-probabilities.
4439
+ Data type only supports float32 or float16 or bfloat16(only supported by
4440
+ Atlas A2 training series products).
4441
+ target (Tensor): :math:`()` or :math:`(N)` ,
4442
+ where the value range is :math:`[0, C-1]`, or :math:`(N, d_1, d_2, ..., d_K)` for
4443
+ high-dimensional loss, data type must be int32 or int64 or uint8.
4444
+ weight (Tensor, optional): A rescaling weight applied to the loss of each batch element.
4445
+ If not None, the shape is :math:`(C,)`.
4446
+ The data type must be float16 or float32 or bfloat16(only supported by Atlas A2 training series products).
4447
+ It should have the same data type as `input` . Default: ``'None'`` .
4448
+ ignore_index (int, optional): Specifies a target value that is ignored
4449
+ and does not contribute to the input gradient. Default: ``-100`` .
4302
4450
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4303
4451
  ``'sum'`` . Default: ``'mean'`` .
4304
4452
 
4305
4453
  - ``'none'``: no reduction will be applied.
4306
- - ``'mean'``: compute and return the mean of elements in the output.
4454
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
4307
4455
  - ``'sum'``: the output elements will be summed.
4308
4456
 
4309
4457
  Returns:
4310
- Tensor or Scalar, if `reduction` is ``'none'``, return a Tensor with same shape and dtype as `input`.
4311
- Otherwise, a scalar value will be returned.
4312
-
4313
- Raises:
4314
- TypeError: If `input` is not a Tensor.
4315
- TypeError: If `target` is not a Tensor.
4316
- ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
4458
+ Tensor. The data type is the same as that of `input`.
4317
4459
 
4318
4460
  Supported Platforms:
4319
- ``Ascend`` ``GPU`` ``CPU``
4461
+ ``Ascend``
4320
4462
 
4321
4463
  Examples:
4322
- >>> from mindspore import Tensor, ops
4323
- >>> from mindspore import dtype as mstype
4324
- >>> x = Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
4325
- >>> target = Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
4326
- >>> output = ops.l1_loss(x, target, reduction="mean")
4327
- >>> print(output)
4328
- 3.0
4464
+ >>> import mindspore
4465
+ >>> import numpy as np
4466
+ >>> from mindspore import Tensor, mint
4467
+ >>> input = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
4468
+ >>> target = mindspore.Tensor(np.array([1, 0, 4]), mindspore.int32)
4469
+ >>> output = mint.nn.functional.nll_loss(input, target)
4470
+
4329
4471
  """
4330
- _check_is_tensor('input', input, "l1_loss")
4331
- _check_is_tensor('target', target, "l1_loss")
4332
- if reduction not in ('mean', 'sum', 'none'):
4333
- raise ValueError(f"For l1_loss, the 'reduction' must be in ['mean', 'sum', 'none'], but got {reduction}.")
4334
- loss = abs_(input - target)
4472
+ return _nllloss_nd(input, target, weight, ignore_index, reduction)
4473
+
4474
+
4475
+ def _nllloss_nd(input, target, weight=None, ignore_index=-100, reduction='mean'):
4476
+ """nllloss_nd inner function"""
4477
+ input_dim = input.ndim
4478
+ class_dim = 0 if input_dim == 1 else 1
4479
+ n_classes = input.shape[class_dim]
4480
+ if weight is None:
4481
+ weight = ones(n_classes, input.dtype)
4482
+ if input_dim < 1:
4483
+ raise ValueError(f"input dim should be less than 1, but got {input_dim}")
4484
+ if input_dim != 1 and input.shape[0] != target.shape[0]:
4485
+ raise ValueError(f"input bacth_size should be equal to target batch_size, but got {input.shape[0]} and "
4486
+ f"{target.shape[0]}")
4487
+ if input_dim == 1 or input_dim == 2:
4488
+ return nllloss_impl(input, target, weight, reduction, ignore_index)[0]
4489
+ if input_dim == 4:
4490
+ return nllloss_2d_op(input, target, weight, reduction, ignore_index)[0]
4491
+ # input_dim==3 or input_dim>4
4492
+ n = input.shape[0]
4493
+ c = input.shape[1]
4494
+ out_size = (n,) + input.shape[2:]
4495
+ if input.size > 0:
4496
+ input = input.view((n, c, 1, -1))
4497
+ else:
4498
+ input = input.view((n, c, 0, 0))
4499
+ if target.size > 0:
4500
+ target = target.view((n, 1, -1))
4501
+ else:
4502
+ target = target.view((n, 0, 0))
4503
+ if reduction != 'none':
4504
+ return nllloss_2d_op(input, target, weight, reduction, ignore_index)[0]
4505
+ ret = nllloss_2d_op(input, target, weight, reduction, ignore_index)[0]
4506
+ return ret.view(out_size)
4507
+
4508
+
4509
+ def _cross_entropy_for_probabilities(input, target, weight, reduction, label_smoothing, class_dim, n_classes):
4510
+ """cross_entropy inner function for class probabilities"""
4511
+ if input.shape != target.shape:
4512
+ raise ValueError("For cross_entropy that target is probabilities, input shape should equal to target shape.")
4513
+ if label_smoothing > 0.0:
4514
+ target = target * (1 - label_smoothing) + label_smoothing / n_classes
4515
+ loss = input * target
4516
+ if weight is not None:
4517
+ weight_ = weight
4518
+ ori_shape = loss.shape
4519
+ if input.ndim > 2:
4520
+ loss = loss.view(ori_shape[:2] + (-1,))
4521
+ weight_ = weight_.view(1, -1, 1)
4522
+ loss = loss * weight_
4523
+ loss = loss.view(ori_shape)
4524
+ if reduction == "mean":
4525
+ return -div(loss.sum(), (input.size / n_classes))
4526
+ if reduction == "sum":
4527
+ return -loss.sum()
4528
+ if reduction == "none":
4529
+ return -loss.sum(class_dim)
4530
+ raise ValueError(f"redution value {reduction} not valid.")
4531
+
4532
+
4533
+ def _cross_entropy_for_class_indices(input, target, weight, ignore_index, reduction, label_smoothing, class_dim,
4534
+ n_classes):
4535
+ """cross_entropy inner function for class indices"""
4536
+ nllloss = _nllloss_nd(input, target, weight, ignore_index, reduction)
4537
+ if label_smoothing > 0.0:
4538
+ if weight is not None:
4539
+ weight_ = weight
4540
+ input_ = input
4541
+ ori_shape = input.shape
4542
+ if input.ndim > 2:
4543
+ input_ = input.view(ori_shape[:2] + (-1,))
4544
+ weight_ = weight_.view(1, -1, 1)
4545
+ loss = input_ * weight_
4546
+ loss = loss.view(ori_shape)
4547
+ smooth_loss = -loss.sum(class_dim)
4548
+ else:
4549
+ smooth_loss = -input.sum(class_dim)
4550
+ ignore_mask = ops.eq(target, ignore_index)
4551
+ smooth_loss = masked_fill_op(smooth_loss, ignore_mask, 0)
4552
+ if reduction == "mean":
4553
+ true_mask = ~ignore_mask
4554
+ if weight is not None:
4555
+ weight_sum = gather_ext(weight, 0, flatten_ext(masked_select(target, true_mask))).sum()
4556
+ if weight_sum == 0:
4557
+ ret = smooth_loss.sum()
4558
+ else:
4559
+ ret = smooth_loss.sum() / weight_sum
4560
+ else:
4561
+ weight_sum = true_mask.sum()
4562
+ if weight_sum == 0:
4563
+ ret = smooth_loss.sum()
4564
+ else:
4565
+ ret = smooth_loss.sum() / weight_sum
4566
+ elif reduction == "sum":
4567
+ ret = smooth_loss.sum()
4568
+ elif reduction == "none":
4569
+ ret = smooth_loss
4570
+ else:
4571
+ raise ValueError(f"redution value {reduction} not valid.")
4572
+ return (1 - label_smoothing) * nllloss + ret * (label_smoothing / n_classes)
4573
+ return nllloss
4574
+
4575
+
4576
+ def cross_entropy_ext(input, target, weight=None, ignore_index=-100, reduction='mean', label_smoothing=0.0):
4577
+ r"""
4578
+ The cross entropy loss between input and target.
4579
+
4580
+ The cross entropy supports two kind of targets:
4581
+
4582
+ - Class indices (int) in the range :math:`[0, C)` where :math:`C` is the number of classes,
4583
+ the loss with reduction=none can be described as:
4584
+
4585
+ .. math::
4586
+
4587
+ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
4588
+ l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
4589
+ \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
4590
+
4591
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, :math:`N` is the batch size,
4592
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
4593
+
4594
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
4595
+
4596
+ .. math::
4597
+
4598
+ \ell(x, y) = \begin{cases}
4599
+ \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
4600
+ \text{if reduction} = \text{'mean',}\\
4601
+ \sum_{n=1}^N l_n, &
4602
+ \text{if reduction} = \text{'sum'.}
4603
+ \end{cases}
4604
+
4605
+ - Probabilities (float) for each class, useful when labels beyond a single class per minibatch item
4606
+ are required, the loss with reduction=none can be described as:
4607
+
4608
+ .. math::
4609
+
4610
+ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
4611
+ l_n = - \sum_{c=1}^C w_c \log \frac{\exp(x_{n,c})}{\sum_{i=1}^C \exp(x_{n,i})} y_{n,c}
4612
+
4613
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
4614
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
4615
+
4616
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
4617
+
4618
+ .. math::
4619
+
4620
+ \ell(x, y) = \begin{cases}
4621
+ \frac{\sum_{n=1}^N l_n}{N}, &
4622
+ \text{if reduction} = \text{'mean',}\\
4623
+ \sum_{n=1}^N l_n, &
4624
+ \text{if reduction} = \text{'sum'.}
4625
+ \end{cases}
4626
+
4627
+ .. warning::
4628
+ This is an experimental API that is subject to change or deletion.
4629
+
4630
+ Note:
4631
+ Dynamic shape, dynamic rank and variable constant input are not supported in `strict graph mode
4632
+ (jit_syntax_level=mindspore.STRICT)
4633
+ <https://www.mindspore.cn/tutorials/en/master/compile/static_graph.html>`_.
4634
+
4635
+ Args:
4636
+ input (Tensor): :math:`(N)` or :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
4637
+ in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)`.
4638
+ `input` is expected to be log-probabilities, data type must be float16 or float32 or bfloat16
4639
+ (only supported by Atlas A2 training series products).
4640
+ target (Tensor): For class indices, tensor of shape :math:`()`, :math:`(N)` or
4641
+ :math:`(N, d_1, d_2, ..., d_K)` , data type must be int32 or int64. For probabilities, tensor of shape
4642
+ :math:`(N,)` , :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32
4643
+ or bfloat16(only supported by Atlas A2 training series products).
4644
+ weight (Tensor, optional): A rescaling weight applied to the loss of each batch element.
4645
+ If not None, the shape is :math:`(C,)`, data type must be float16 or float32 or bfloat16(only supported by
4646
+ Atlas A2 training series products). Default: ``None`` .
4647
+ ignore_index (int, optional): Specifies a target value that is ignored and does not contribute to the input
4648
+ gradient. Only valid in class indices, please set it to a negative number in probabilities.
4649
+ Default: ``-100`` .
4650
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4651
+ ``'sum'`` . Default: ``'mean'`` .
4652
+
4653
+ - ``'none'``: no reduction will be applied.
4654
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
4655
+ - ``'sum'``: the output elements will be summed.
4656
+
4657
+ label_smoothing (float, optional): Label smoothing values, a regularization tool used to prevent the model
4658
+ from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default: ``0.0`` .
4659
+
4660
+ Returns:
4661
+ Tensor, the data type is the same as `input` .
4662
+
4663
+ Supported Platforms:
4664
+ ``Ascend``
4665
+
4666
+ Examples:
4667
+ >>> import mindspore as ms
4668
+ >>> from mindspore import ops, Tensor
4669
+ >>> import numpy as np
4670
+ >>> # Case 1: Indices labels
4671
+ >>> inputs = Tensor(np.random.randn(3, 5), ms.float32)
4672
+ >>> target = Tensor(np.array([1, 0, 4]), ms.int32)
4673
+ >>> output = ops.cross_entropy_ext(inputs, target)
4674
+ >>> # Case 2: Probability labels
4675
+ >>> inputs = Tensor(np.random.randn(3, 5), ms.float32)
4676
+ >>> target = Tensor(np.random.randn(3, 5), ms.float32)
4677
+ >>> output = ops.cross_entropy_ext(inputs, target)
4678
+ """
4679
+ if not isinstance(input, Tensor) or not isinstance(target, Tensor):
4680
+ raise TypeError(
4681
+ f"For cross_entropy, input and target must be Tensor, but got input:{type(input)}, target:{type(target)}.")
4682
+ if weight is not None and not isinstance(weight, Tensor):
4683
+ raise TypeError(f"For cross_entropy, weight must be Tensor or None, but got {type(weight)}.")
4684
+ if label_smoothing < 0.0 or label_smoothing > 1.0:
4685
+ raise ValueError(f"For cross_entropy, label_smoothing must in [0, 1]")
4686
+ if input.ndim == 0 or input.shape[0] == 0:
4687
+ raise ValueError(f"For cross_entropy, input don't support 0-dim and shape[0].")
4688
+ class_dim = 0 if input.ndim == 1 else 1
4689
+ n_classes = input.shape[class_dim]
4690
+ input = log_softmax_ext(input, class_dim, dtype=input.dtype)
4691
+ # for probabilities
4692
+ target_dtype = target.dtype
4693
+ if isinstance(target_dtype, type(mstype.tensor_type)):
4694
+ target_dtype = target_dtype.element_type()
4695
+ if target_dtype in mstype.float_type:
4696
+ return _cross_entropy_for_probabilities(input, target, weight, reduction, label_smoothing, class_dim,
4697
+ n_classes)
4698
+ # for class indices
4699
+ return _cross_entropy_for_class_indices(input, target, weight, ignore_index, reduction, label_smoothing,
4700
+ class_dim, n_classes)
4701
+
4702
+
4703
+ def l1_loss(input, target, reduction='mean'):
4704
+ r"""
4705
+ Calculate the mean absolute error between the `input` value and the `target` value.
4706
+
4707
+ Assuming that the :math:`x` and :math:`y` (predicted and target value) are 1-D Tensor,
4708
+ length :math:`N`, `reduction` is set to ``'none'``, then calculate the loss of
4709
+ :math:`x` and :math:`y` without dimensionality reduction.
4710
+
4711
+ The formula is as follows:
4712
+
4713
+ .. math::
4714
+ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad \text{with } l_n = \left| x_n - y_n \right|,
4715
+
4716
+ where :math:`N` is the batch size.
4717
+
4718
+ If `reduction` is set to ``'mean'`` or ``'sum'`` , then:
4719
+
4720
+ .. math::
4721
+ \ell(x, y) =
4722
+ \begin{cases}
4723
+ \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
4724
+ \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.}
4725
+ \end{cases}
4726
+
4727
+ Args:
4728
+ input (Tensor): Predicted value, Tensor of any dimension.
4729
+ target (Tensor): Target value, usually has the same shape as the `input`.
4730
+ If `input` and `target` have different shape, make sure they can broadcast to each other.
4731
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4732
+ ``'sum'`` . Default: ``'mean'`` .
4733
+
4734
+ - ``'none'``: no reduction will be applied.
4735
+ - ``'mean'``: compute and return the mean of elements in the output.
4736
+ - ``'sum'``: the output elements will be summed.
4737
+
4738
+ Returns:
4739
+ Tensor or Scalar, if `reduction` is ``'none'``, return a Tensor with same shape and dtype as `input`.
4740
+ Otherwise, a scalar value will be returned.
4741
+
4742
+ Raises:
4743
+ TypeError: If `input` is not a Tensor.
4744
+ TypeError: If `target` is not a Tensor.
4745
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
4746
+
4747
+ Supported Platforms:
4748
+ ``Ascend`` ``GPU`` ``CPU``
4749
+
4750
+ Examples:
4751
+ >>> from mindspore import Tensor, ops
4752
+ >>> from mindspore import dtype as mstype
4753
+ >>> x = Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
4754
+ >>> target = Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
4755
+ >>> output = ops.l1_loss(x, target, reduction="mean")
4756
+ >>> print(output)
4757
+ 3.0
4758
+ """
4759
+ _check_is_tensor('input', input, "l1_loss")
4760
+ _check_is_tensor('target', target, "l1_loss")
4761
+ if reduction not in ('mean', 'sum', 'none'):
4762
+ raise ValueError(f"For l1_loss, the 'reduction' must be in ['mean', 'sum', 'none'], but got {reduction}.")
4763
+ loss = abs_(input - target)
4335
4764
  return _get_loss(loss, reduction, "l1_loss")
4336
4765
 
4337
4766
 
@@ -4348,8 +4777,8 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
4348
4777
  .. math::
4349
4778
  L_{i} =
4350
4779
  \begin{cases}
4351
- \frac{0.5 (x_i - y_i)^{2}}{\beta}, & \text{if } |x_i - y_i| < \beta \\
4352
- |x_i - y_i| - 0.5 * \beta, & \text{otherwise. }
4780
+ \frac{0.5 (x_i - y_i)^{2}}{\text{beta}}, & \text{if } |x_i - y_i| < \text{beta} \\
4781
+ |x_i - y_i| - 0.5 * \text{beta}, & \text{otherwise. }
4353
4782
  \end{cases}
4354
4783
 
4355
4784
  If `reduction` is not `none`, then:
@@ -4364,12 +4793,26 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
4364
4793
  Here :math:`\text{beta}` controls the point where the loss function changes from quadratic to linear.
4365
4794
  :math:`\text{beta}>0` , its default value is ``1.0`` . :math:`N` is the batch size.
4366
4795
 
4796
+ .. warning::
4797
+ This API has poor performance on CPU and it is recommended to run it on the Ascend/GPU.
4798
+
4367
4799
  Args:
4368
- input (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4369
- Data type is float16, float32 or float64.
4370
- target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`, same shape and dtype as the `input`.
4371
- beta (float): A parameter used to control the point where the function will change between
4372
- L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
4800
+ input (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means,
4801
+ any number of additional dimensions.Supported dtypes:
4802
+
4803
+ - Ascend: float16, float32, bfloat16.
4804
+ - CPU/GPU: float16, float32, float64.
4805
+ target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`.
4806
+
4807
+ - CPU/Ascend: has the same shape as the `input`, `target` and `input`
4808
+ comply with the implicit type conversion rules to make the data types consistent.
4809
+ - GPU: has the same shape and dtype as the `input`.
4810
+
4811
+ beta (number, optional): A parameter used to control the point where the function will change between
4812
+ L1 to L2 loss. Default: ``1.0`` .
4813
+
4814
+ - Ascend: The value should be equal to or greater than zero.
4815
+ - CPU/GPU: The value should be greater than zero.
4373
4816
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4374
4817
  ``'sum'`` . Default: ``'none'`` .
4375
4818
 
@@ -4379,14 +4822,15 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
4379
4822
 
4380
4823
  Returns:
4381
4824
  Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `input`.
4382
- Otherwise, the shape of output tensor is :math:`(1,)`.
4825
+ Otherwise, the shape of output tensor is :math:`()`.
4383
4826
 
4384
4827
  Raises:
4385
- TypeError: If `beta` is not a float.
4386
- ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
4387
- TypeError: If dtype of `input` or `target` is not one of float16, float32, float64.
4388
- ValueError: If `beta` is less than or equal to 0.
4828
+ TypeError: If input `input`, `target` is not Tensor.
4829
+ RuntimeError: If dtype of `input` or `target` is not one of float16, float32, float64, bfloat16.
4389
4830
  ValueError: If shape of `input` is not the same as `target`.
4831
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
4832
+ TypeError: If `beta` is not a float, int or bool.
4833
+ RuntimeError: If `beta` is less than or equal to 0.
4390
4834
 
4391
4835
  Supported Platforms:
4392
4836
  ``Ascend`` ``GPU`` ``CPU``
@@ -4476,8 +4920,8 @@ def leaky_relu(input, alpha=0.2):
4476
4920
 
4477
4921
  Args:
4478
4922
  input (Tensor): The input of leaky_relu is a Tensor of any dimension.
4479
- alpha (Union[int, float]): Slope of the activation function when the element of `input` is less than 0.
4480
- Default: ``0.2`` .
4923
+ alpha (Union[int, float], optional): Slope of the activation function when
4924
+ the element of `input` is less than 0. Default: ``0.2`` .
4481
4925
 
4482
4926
  Returns:
4483
4927
  Tensor, has the same type and shape as the `input`.
@@ -4509,37 +4953,25 @@ def leaky_relu(input, alpha=0.2):
4509
4953
 
4510
4954
  def intopk(x1, x2, k):
4511
4955
  r"""
4512
- Determines whether the targets are in the top `k` predictions.
4956
+ Return whether the elements in second input tensor exist among the top `k` elements of the first input tensor.
4513
4957
 
4514
4958
  Args:
4515
- x1 (Tensor): A 2D Tensor defines the predictions of a batch of samples with float16 or float32
4516
- data type.
4517
- x2 (Tensor): A 1D Tensor defines the labels of a batch of samples with int32 data type. The size of `x2`
4518
- must be equal to the first dimension of `x1`. The values of `x2` can not be negative and
4519
- must be equal to or less than index of x1's second dimension.
4520
- k (int): Specifies the number of top elements to be used for computing precision along the last dimension.
4959
+ x1 (Tensor): The 2-D input tensor.
4960
+ x2 (Tensor): The 1-D input tensor, should satisfy :math:`x2.shape[0] = x1.shape[0]` .
4961
+ k (int): Top `k` elements.
4521
4962
 
4522
4963
  Returns:
4523
- Tensor has 1 dimension of type bool and the same shape with `x2`. For labeling sample `i` in `x2`,
4524
- if the label in the first `k` predictions for sample `i` is in `x1`, then the value is True, otherwise False.
4525
-
4526
- Raises:
4527
- TypeError: If `k` is not an int.
4528
- TypeError: If `x1` or `x2` is not a Tensor.
4529
- TypeError: If dtype of `x1` is neither float16 nor float32.
4964
+ A 1-D tensor whose data type is bool, has the same shape with `x2`.
4530
4965
 
4531
4966
  Supported Platforms:
4532
4967
  ``Ascend`` ``GPU`` ``CPU``
4533
4968
 
4534
4969
  Examples:
4535
4970
  >>> import mindspore
4536
- >>> import numpy as np
4537
- >>> from mindspore import Tensor, ops
4538
- >>> x1 = Tensor(np.array([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]]), mindspore.float32)
4539
- >>> x2 = Tensor(np.array([1, 3]), mindspore.int32)
4540
- >>> output = ops.intopk(x1, x2, 3)
4541
- >>> print(output)
4542
- [ True False]
4971
+ >>> x1 = mindspore.tensor([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]], mindspore.float32)
4972
+ >>> x2 = mindspore.tensor([1, 3], mindspore.int32)
4973
+ >>> mindspore.ops.intopk(x1, x2, 3)
4974
+ Tensor(shape=[2], dtype=Bool, value= [ True, False])
4543
4975
  """
4544
4976
  _in_topk = _get_cache_prim(P.InTopK)(k)
4545
4977
  return _in_topk(x1, x2)
@@ -4961,7 +5393,7 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
4961
5393
  H_{in}, W_{in})` (5-D case) and dtype of float32 or float64.
4962
5394
  grid (Tensor): flow-field with shape of :math:`(N, H_{out}, W_{out}, 2)` (4-D case) or :math:`(N, D_{out},
4963
5395
  H_{out}, W_{out}, 3)` (5-D case) and same dtype as `input`.
4964
- mode (str): An optional string specifying the interpolation method. The optional values are
5396
+ mode (str, optional): An optional string specifying the interpolation method. The optional values are
4965
5397
  ``'bilinear'``, ``'nearest'``. Default: ``'bilinear'`` . Note: `bicubic` is not supported yet. When
4966
5398
  `mode="bilinear"` and the input is 5-D, the interpolation mode used internally will actually
4967
5399
  be trilinear. However, when the input is 4-D, the interpolation mode will legistimately be bilinear.
@@ -4976,9 +5408,10 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
4976
5408
  It performs bilinear interpolation in the two spatial dimensions and linear interpolation along
4977
5409
  the third dimension. It is commonly used for volume or 3D image interpolation.
4978
5410
 
4979
- padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
5411
+ padding_mode (str, optional): An optional string specifying the pad method.
5412
+ The optional values are "zeros", "border" or
4980
5413
  "reflection". Default: ``'zeros'`` .
4981
- align_corners (bool): If set to `True`, the extrema (-1 and 1) are considered as referring to
5414
+ align_corners (bool, optional): If set to `True`, the extrema (-1 and 1) are considered as referring to
4982
5415
  the center points of the input's corner pixels. If set to `False`, they are instead considered as referring
4983
5416
  to the corner points of the input's corner pixels, making the sampling more resolution agnostic. Default:
4984
5417
  ``False`` .
@@ -5389,12 +5822,15 @@ def conv3d_transpose(inputs, weight, pad_mode='valid', padding=0, stride=1, dila
5389
5822
  Args:
5390
5823
  inputs (Tensor): The gradients with respect to the output of the convolution.
5391
5824
  The shape conforms to the default.
5392
- data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`. Currently dout data type only supports float16
5393
- and float32.
5825
+ data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`.
5826
+ Supported dtypes:
5827
+
5828
+ - Ascend: float16.
5829
+ - GPU/CPU: float16, float32.
5394
5830
  weight (Tensor): Set size of kernel is :math:`(K_d, K_h, K_w)`, then the shape is
5395
5831
  :math:`(C_{in}, C_{out}//group, K_d, K_h, K_w)`. Where :math:`group` is the Args parameter,
5396
5832
  :math:`//` is the symbol for integer division.
5397
- Currently weight data type only supports float16 and float32.
5833
+ It has the same dtype as `dout`.
5398
5834
  pad_mode (str): Specifies padding mode. The optional values are
5399
5835
  "same", "valid", "pad". Default: "valid".
5400
5836
 
@@ -5538,9 +5974,9 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5538
5974
 
5539
5975
  The shape of the convolutional kernel is given by :math:`(\text{kernel_size})`,
5540
5976
  where :math:`\text{kernel_size}` is the width of the kernel.
5541
- If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5542
- will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`,
5543
- where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
5977
+ If we consider the input and output channels as well as the `groups` parameter, the complete kernel shape
5978
+ will be :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
5979
+ where `groups` is the number of groups dividing `x`'s input channel when applying group convolution.
5544
5980
 
5545
5981
  For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5546
5982
  <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
@@ -5655,7 +6091,7 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5655
6091
 
5656
6092
  def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
5657
6093
  r"""
5658
- Applies a 2D convolution over an input tensor. The input tenor is typically of
6094
+ Applies a 2D convolution over an input tensor. The input tensor is typically of
5659
6095
  shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
5660
6096
  channel number, :math:`H` is feature height, :math:`W` is feature width.
5661
6097
 
@@ -5690,9 +6126,9 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5690
6126
  The shape of the convolutional kernel is given by :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
5691
6127
  where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the kernel,
5692
6128
  respectively.
5693
- If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5694
- will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
5695
- where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
6129
+ If we consider the input and output channels as well as the `groups` parameter, the complete kernel shape
6130
+ will be :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
6131
+ where `groups` is the number of groups dividing `x`'s input channel when applying group convolution.
5696
6132
 
5697
6133
  For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5698
6134
  <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
@@ -5792,6 +6228,127 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5792
6228
  return output
5793
6229
 
5794
6230
 
6231
+ def conv1d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
6232
+ r"""
6233
+ Applies a 1D convolution over an input tensor. The input tenor is typically
6234
+ of shape :math:`(N, C_{in}, L_{in})`,
6235
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`L` is sequence length.
6236
+
6237
+ The output is calculated based on formula:
6238
+
6239
+ .. math::
6240
+
6241
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
6242
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
6243
+
6244
+ where :math:`bias` is the output channel bias, :math:`ccor` is
6245
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
6246
+ :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
6247
+
6248
+ - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
6249
+ where :math:`N` is the batch size of the input.
6250
+
6251
+ - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`,
6252
+ where :math:`C_{out}` is the number of
6253
+ output channels, which is also equal to the number of kernels.
6254
+
6255
+ - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`,
6256
+ where :math:`C_{in}` is the number of
6257
+ input channels, which is also equal to the number of channels in the convolutional kernels.
6258
+
6259
+ Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th
6260
+ output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional
6261
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
6262
+ channel in the :math:`i`-th batch of the input feature map.
6263
+
6264
+ The shape of the convolutional kernel is given by :math:`(\text{kernel_size})`,
6265
+ where :math:`\text{kernel_size}` is the length of the kernel.
6266
+ If we consider the input and output channels as well as the `groups` parameter, the complete kernel shape
6267
+ will be :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
6268
+ where `groups` is the number of groups dividing `x`'s input channel when applying groups convolution.
6269
+
6270
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
6271
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
6272
+
6273
+ .. warning::
6274
+ This is an experimental API that is subject to change or deletion.
6275
+
6276
+ Args:
6277
+ input (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})` or :math:`(C_{in}, L_{in})`.
6278
+ weight (Tensor): Tensor of shape
6279
+ :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`, then the size of kernel
6280
+ is :math:`(\text{kernel_size})`.
6281
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
6282
+ When bias is ``None`` , zeros will be used. Default: ``None`` .
6283
+ stride (Union[int, tuple[int], list[int]], optional): The movement stride of the 1D convolution kernel.
6284
+ The data type is an integer or a tuple of one integer. Default: ``1`` .` .
6285
+ padding (Union[int, tuple[int], list[int], str], optional): The number of padding
6286
+ on the input.
6287
+ The data type is an integer or a tuple of one integer or string {`valid`, `same`}.
6288
+ The value should be greater than or equal to 0. Default: ``0`` .
6289
+
6290
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
6291
+ are the same when `stride` is set to ``1``.
6292
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
6293
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right side.
6294
+ If this mode is set, `stride` must be 1.
6295
+
6296
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
6297
+ possible length. Extra sequence that could not complete a full stride will
6298
+ be discarded.
6299
+
6300
+ dilation (Union[int, tuple[int], list[int]], optional): Specifies the dilation rate to use for
6301
+ dilated convolution. It can be a single int or a tuple of 1 integer.
6302
+ Assuming :math:`dilation=(d)`, the convolutional kernel samples the input with a
6303
+ spacing of :math:`d-1` elements in the length direction.
6304
+ Default: ``1`` .
6305
+ groups (int, optional): Splits filter into groups, `in_channels` and `out_channels` must be
6306
+ divisible by `groups`. If the groups is equal to `in_channels` and `out_channels`,
6307
+ this 1D convolution layer also can be called 1D depthwise convolution layer. Default: ``1`` .
6308
+
6309
+ - :math:`(C_{in} \text{ % } \text{groups} == 0)` , :math:`(C_{out} \text{ % } \text{groups} == 0)` ,
6310
+ :math:`(C_{out} >= \text{groups})` , :math:`(\text{kernel_size[1]} = C_{in} / \text{groups})`。
6311
+
6312
+ Returns:
6313
+ Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, L_{out})`.
6314
+ To see how different pad modes affect the output shape, please refer to
6315
+ :class:`mindspore.mint.nn.Conv1d` for more details.
6316
+
6317
+ Raises:
6318
+ ValueError: Args and size of the input feature map should satisfy the output formula to ensure that the size of
6319
+ the output feature map is positive; otherwise, an error will be reported.
6320
+ RuntimeError: On Ascend, due to the limitation of the L1 cache size of different NPU chip, if input size or
6321
+ kernel size is too large, it may trigger an error.
6322
+ TypeError: If `in_channels`, `out_channels` or `groups` is not an int.
6323
+ TypeError: If `kernel_size`, `stride` or `dilation` is neither an int not a tuple.
6324
+ ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
6325
+ ValueError: If `padding` is less than 0.
6326
+ ValueError: If `padding` is `same` , `stride` is not equal to 1.
6327
+ ValueError: The input parameters do not satisfy the convolution output formula.
6328
+ ValueError: The KernelSize cannot exceed the size of the input feature map.
6329
+ ValueError: The value of padding cannot cause the calculation area to exceed the input size.
6330
+
6331
+ Supported Platforms:
6332
+ ``Ascend``
6333
+
6334
+ Examples:
6335
+ >>> import mindspore
6336
+ >>> import numpy as np
6337
+ >>> from mindspore import Tensor, ops, mint
6338
+ >>> x = Tensor(np.ones([10, 32, 32]), mindspore.float32)
6339
+ >>> weight = Tensor(np.ones([32, 32, 3]), mindspore.float32)
6340
+ >>> output = mint.nn.functional.conv1d(x, weight)
6341
+ >>> print(output.shape)
6342
+ (10, 32, 30)
6343
+ """
6344
+ if isinstance(padding, (int, tuple, list)):
6345
+ return conv1d_ext_op(input, weight, bias, stride, padding, dilation, groups)
6346
+ if isinstance(padding, str):
6347
+ return conv1d_padding_op(input, weight, bias, stride, padding, dilation, groups)
6348
+ raise TypeError(f"For conv1d, the parameter 'padding' must be a tuple/list " \
6349
+ f"or a string, but got {type(padding)}")
6350
+
6351
+
5795
6352
  def _check_stride_when_same_mode(stride):
5796
6353
  """ stride must be 1 when pad mode is same """
5797
6354
  if isinstance(stride, int):
@@ -5840,9 +6397,9 @@ def _get_pad_nd_info(pad_l, pad_r):
5840
6397
 
5841
6398
  def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
5842
6399
  r"""
5843
- Applies a 2D convolution over an input tensor. The input tenor is typically of
5844
- shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
5845
- channel number, :math:`H` is feature height, :math:`W` is feature width.
6400
+ Applies a 2D convolution over an input tensor. The input tensor is typically of
6401
+ shape :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(C_{in}, H_{in}, W_{in})`,
6402
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is feature height, :math:`W` is feature width.
5846
6403
 
5847
6404
  The output is calculated based on formula:
5848
6405
 
@@ -5855,8 +6412,6 @@ def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups
5855
6412
  the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5856
6413
  , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5857
6414
 
5858
- Here are the indices' meanings:
5859
-
5860
6415
  - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
5861
6416
  where :math:`N` is the batch size of the input.
5862
6417
 
@@ -5883,55 +6438,67 @@ def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups
5883
6438
  <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
5884
6439
  `ConvNets <http://cs231n.github.io/convolutional-networks/>`_.
5885
6440
 
5886
- Note:
5887
- On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
5888
- That is, when `groups>1`, condition :math:`C_{in}` = :math:`C_{out}` = `groups` must be satisfied.
6441
+ .. warning::
6442
+ This is an experimental API that is subject to change or deletion.
5889
6443
 
5890
6444
  Args:
5891
- input (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
6445
+ input (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` or :math:`(C_{in}, H_{in}, W_{in})`.
5892
6446
  weight (Tensor): Tensor of shape
5893
6447
  :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
5894
6448
  is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
5895
6449
  bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
5896
6450
  When bias is ``None`` , zeros will be used. Default: ``None`` .
5897
- stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
5898
- the height and width of movement are both strides, or a tuple of two int numbers that
6451
+ stride (Union(int, tuple[int], list[int]), optional): The distance of kernel moving, an int number that
6452
+ represents the height and width of movement are both strides, or a tuple of two int numbers that
5899
6453
  represent height and width of movement respectively. Default: ``1`` .
5900
- padding (Union(int, tuple[int], list[int], str), optional): Implicit paddings on both sides of the input `x`.
5901
- Can be a string, one integer or a tuple/list with 2 integers.
5902
- If `padding` is a string, the optional values are ``"same"`` , ``"valid"``.
5903
-
5904
- - same: Adopts the way of completion. The height and width of the output will be equal to
5905
- the input `x` divided by stride. The padding will be evenly calculated in top and bottom,
5906
- left and right possiblily. Otherwise, the last extra padding will be calculated from the bottom
5907
- and the right side. If this mode is set, `padding` must be 0.
5908
-
5909
- - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
5910
- without padding. Extra pixels will be discarded. If this mode is set, `padding` must be 0.
5911
-
5912
- If `padding` is one integer, the paddings of top, bottom, left and right are the same, equal to padding.
5913
- If `padding` is a tuple/list with 2 integers, the padding of top adn bottom is padding[0],
5914
- and the padding of left and right is padding[1]. Default: ``0`` .
5915
- dilation (Union(int, tuple[int]), optional): Gaps between kernel elements.The data type is int or a tuple of
5916
- 2 integers. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
6454
+ padding (Union[int, tuple[int], list[int], str], optional): The number of padding
6455
+ on the height and width directions of the input.
6456
+ The data type is an integer or a tuple of two integers or string {`valid`, `same`}. If `padding` is an
6457
+ integer, then `padding_{H}` and `padding_{W}` are all equal to `padding`.
6458
+ If `padding` is a tuple of 2 integers, then `padding_{H}` and `padding_{W}`
6459
+ is equal to `padding[0]` and `padding[1]` respectively.
6460
+ The value should be greater than or equal to 0. Default: ``0`` .
6461
+
6462
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
6463
+ are the same when `stride` is set to ``1``.
6464
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
6465
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
6466
+ If this mode is set, `stride` must be 1.
6467
+
6468
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
6469
+ possible height and width. Extra pixels that could not complete a full stride will
6470
+ be discarded.
6471
+
6472
+ dilation (Union(int, tuple[int], list[int]), optional): Gaps between kernel elements.The data type
6473
+ is int or a tuple of 2 integers. Specifies the dilation rate to use for dilated convolution.
6474
+ If set to be :math:`k > 1`,
5917
6475
  there will be :math:`k - 1` pixels skipped for each sampling location. Its value must
5918
6476
  be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: ``1`` .
5919
6477
  groups (int, optional): Splits `input` into groups. Default: ``1`` .
5920
6478
 
6479
+ - :math:`(C_{in} \text{ % } \text{groups} == 0)` , :math:`(C_{out} \text{ % } \text{groups} == 0)` ,
6480
+ :math:`(C_{out} >= \text{groups})` , :math:`(\text{kernel_size[1]} = C_{in} / \text{groups})`
6481
+
5921
6482
  Returns:
5922
6483
  Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
5923
6484
  To see how different pad modes affect the output shape, please refer to
5924
- :class:`mindspore.nn.Conv2d` for more details.
5925
-
6485
+ :class:`mindspore.mint.nn.Conv2d` for more details.
5926
6486
 
5927
6487
  Raises:
5928
- TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
5929
- TypeError: `groups` is not an int.
6488
+ ValueError: Args and size of the input feature map should satisfy the output formula to ensure that the size of
6489
+ the output feature map is positive; otherwise, an error will be reported. For more details on the output
6490
+ formula, please refer to :class:`mindspore.mint.nn.Conv2d`.
6491
+ RuntimeError: On Ascend, due to the limitation of the L1 cache size of different NPU chip, if input size or
6492
+ kernel size is too large, it may trigger an error.
6493
+ TypeError: If `in_channels` , `out_channels` or `groups` is not an int.
6494
+ TypeError: If `kernel_size` , `stride` or `dilation` is neither an int nor a tuple.
5930
6495
  TypeError: If `bias` is not a Tensor.
5931
6496
  ValueError: If the shape of `bias` is not :math:`(C_{out})` .
5932
6497
  ValueError: If `stride` or `dilation` is less than 1.
5933
- ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
5934
- ValueError: If `padding` is a tuple/list whose length is not equal to 2.
6498
+ ValueError: If `padding` is `same` , `stride` is not equal to 1.
6499
+ ValueError: The input parameters do not satisfy the convolution output formula.
6500
+ ValueError: The KernelSize cannot exceed the size of the input feature map.
6501
+ ValueError: The value of padding cannot cause the calculation area to exceed the input size.
5935
6502
 
5936
6503
  Supported Platforms:
5937
6504
  ``Ascend``
@@ -5939,123 +6506,21 @@ def conv2d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups
5939
6506
  Examples:
5940
6507
  >>> import mindspore
5941
6508
  >>> import numpy as np
5942
- >>> from mindspore import Tensor, ops
5943
- >>> from mindspore.ops.function.nn_func import conv2d_ext
6509
+ >>> from mindspore import Tensor, ops, mint
5944
6510
  >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
5945
6511
  >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
5946
- >>> output = conv2d_ext(x, weight)
6512
+ >>> output = mint.nn.functional.conv2d(x, weight)
5947
6513
  >>> print(output.shape)
5948
6514
  (10, 32, 30, 30)
5949
6515
  """
5950
-
5951
- def _convolution_same(input, weight, bias, dilation, groups):
5952
- """ convolution when mode is 'same' """
5953
- if isinstance(dilation, int):
5954
- dilation = (dilation,) * 2
5955
- validator.check_int(len(weight.shape), 4, validator.EQ, "weight.shape", 'conv2d')
5956
- validator.check_int(len(dilation), 2, validator.EQ, "dilation", 'conv2d')
5957
-
5958
- # Calc padding info
5959
- need_pad_nd, pad_l, pad_r = _get_pad_info(dilation, weight)
5960
- if not need_pad_nd:
5961
- conv = _get_cache_prim(Convolution)(stride, pad_l, dilation, False, (0, 0), groups)
5962
- return conv(input, weight, bias)
5963
-
5964
- # Calc pad nd info
5965
- pad_nd, pad_l = _get_pad_nd_info(pad_l, pad_r)
5966
- pad_nd_op = _get_cache_prim(ConstantPadND)()
5967
- padded_input = pad_nd_op(input, pad_nd, 0)
5968
- conv = _get_cache_prim(Convolution)(stride, pad_l, dilation, False, (0, 0), groups)
5969
- return conv(padded_input, weight, bias)
5970
-
5971
- if isinstance(padding, int):
5972
- padding = (padding,) * 2
5973
-
5974
- if isinstance(padding, (tuple, list)):
5975
- conv = _get_cache_prim(Convolution)(stride, padding, dilation, False, (0, 0), groups)
5976
- return conv(input, weight, bias)
6516
+ if isinstance(padding, (int, tuple, list)):
6517
+ return conv2d_ext_op(input, weight, bias, stride, padding, dilation, groups)
5977
6518
  if isinstance(padding, str):
5978
- if padding == 'valid':
5979
- conv = _get_cache_prim(Convolution)(stride, (0, 0), dilation, False, (0, 0), groups)
5980
- return conv(input, weight, bias)
5981
- if padding == 'same':
5982
- _check_stride_when_same_mode(stride)
5983
- return _convolution_same(input, weight, bias, dilation, groups)
5984
- raise ValueError(f"For conv2d, the parameter 'padding' must be 'same' or 'valid' when " \
5985
- f"the type of 'padding' is string.")
6519
+ return conv2d_padding_op(input, weight, bias, stride, padding, dilation, groups)
5986
6520
  raise TypeError(f"For conv2d, the parameter 'padding' must be a tuple/list " \
5987
6521
  f"or a string, but got {type(padding)}")
5988
6522
 
5989
6523
 
5990
- def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1):
5991
- r"""
5992
- Calculates a 2D transposed convolution, which can be regarded as Conv2d for the gradient of the input,
5993
- also called deconvolution (although it is not an actual deconvolution).
5994
-
5995
- The input is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
5996
- where :math:`N` is batch size, :math:`C_{in}` is space dimension,
5997
- :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively.
5998
-
5999
- When Conv2d and Conv2dTranspose are initialized with the same parameters, and `pad_mode` is set to 'pad',
6000
- :math:`dilation * (kernel\_size - 1) - padding` amount of zero will be paded to the height and width
6001
- directions of the input, they are inverses of each other in regard to the input and output shapes in this case.
6002
- However, when `stride` > 1, Conv2d maps multiple input shapes to the same output shape. Deconvolutional network
6003
- can refer to `Deconvolutional Networks <https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf>`_.
6004
-
6005
- Args:
6006
- input (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
6007
- weight (Tensor): Tensor of shape
6008
- :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
6009
- is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
6010
- bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
6011
- When bias is ``None`` , zeros will be used. Default: ``None`` .
6012
- stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
6013
- the height and width of movement are both strides, or a tuple of two int numbers that
6014
- represent height and width of movement respectively. Default: ``1`` .
6015
- padding (Union(int, tuple[int], list[int]), optional): Implicit paddings on both sides of the input `x`.
6016
- Can be an integer or a tuple/list with 2 integers.
6017
- output_padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the output.
6018
- The data type is an integer or a tuple of two integers. If `output_padding` is an integer,
6019
- then the bottom and right padding are all equal to `output_padding`. If `output_padding` is a tuple of
6020
- 2 integers, then the bottom and right padding is equal to `output_padding[0]`, `output_padding[1]`
6021
- respectively.
6022
- groups (int, optional): Splits `input` into groups. Default: ``1`` .
6023
- dilation (Union(int, tuple[int]), optional): Gaps between kernel elements.The data type is int or a tuple of
6024
- 2 integers. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
6025
- there will be :math:`k - 1` pixels skipped for each sampling location. Its value must
6026
- be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: ``1`` .
6027
-
6028
- Returns:
6029
- Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
6030
- To see how different pad modes affect the output shape, please refer to
6031
- :class:`mindspore.nn.Conv2dTranspose` for more details.
6032
-
6033
-
6034
- Raises:
6035
- TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
6036
- TypeError: `groups` is not an int.
6037
- TypeError: If `bias` is not a Tensor.
6038
- ValueError: If the shape of `bias` is not :math:`(C_{out})` .
6039
- ValueError: If `stride` or `dilation` is less than 1.
6040
- ValueError: If `padding` is a tuple/list whose length is not equal to 2.
6041
-
6042
- Supported Platforms:
6043
- ``Ascend``
6044
-
6045
- Examples:
6046
- >>> import mindspore
6047
- >>> import numpy as np
6048
- >>> from mindspore import Tensor, ops
6049
- >>> x = Tensor(np.ones([1, 6, 32, 32]), mindspore.float32)
6050
- >>> weight = Tensor(np.ones([6, 3, 5, 5]), mindspore.float32)
6051
- >>> output = ops.conv_transpose2d(x, weight)
6052
- >>> print(output.shape)
6053
- (1, 3, 36, 36)
6054
- """
6055
- conv = _get_cache_prim(Convolution)(stride, padding, dilation, True, output_padding, groups)
6056
- return conv(input, weight, bias)
6057
-
6058
-
6059
6524
  def hardtanh(input, min_val=-1.0, max_val=1.0):
6060
6525
  r"""
6061
6526
  Applies the hardtanh activation function element-wise. The activation function is defined as:
@@ -6742,10 +7207,10 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
6742
7207
  :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
6743
7208
  where :math:`\text{kernel_size[0]}` , :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are the depth,
6744
7209
  height and width of the kernel, respectively.
6745
- If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
6746
- will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
7210
+ If we consider the input and output channels as well as the `groups` parameter, the complete kernel shape
7211
+ will be :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]},
6747
7212
  \text{kernel_size[1]}, \text{kernel_size[2]})`,
6748
- where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
7213
+ where `groups` is the number of groups dividing `x`'s input channel when applying group convolution.
6749
7214
 
6750
7215
  For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
6751
7216
  <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
@@ -6795,91 +7260,202 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
6795
7260
 
6796
7261
  Returns:
6797
7262
  Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
7263
+ To see how different pad modes affect the output shape, please refer to
7264
+ :class:`mindspore.nn.Conv3d` for more details.
6798
7265
 
6799
- `pad_mode` is ``"same"``:
7266
+ Raises:
7267
+ TypeError: If `out_channel` or `groups` is not an int.
7268
+ TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
7269
+ TypeError: If `bias` is not a Tensor.
7270
+ ValueError: If the shape of `bias` is not :math:`(C_{out})`.
7271
+ ValueError: If `stride` or `dilation` is less than 1.
7272
+ ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
7273
+ ValueError: If `padding` is a tuple or list whose length is not equal to 3.
7274
+ ValueError: If `pad_mode` is not equal to 'pad' and `pad` is greater than 0.
6800
7275
 
6801
- .. math::
6802
- \begin{array}{ll} \\
6803
- D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
6804
- H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
6805
- W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
6806
- \end{array}
7276
+ Supported Platforms:
7277
+ ``Ascend`` ``GPU``
7278
+
7279
+ Examples:
7280
+ >>> import mindspore
7281
+ >>> import numpy as np
7282
+ >>> from mindspore import Tensor, ops
7283
+ >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float16)
7284
+ >>> weight = Tensor(np.ones([32, 3, 4, 3, 3]), mindspore.float16)
7285
+ >>> output = ops.conv3d(x, weight, pad_mode="same", padding=0, stride=1, dilation=1, groups=1)
7286
+ >>> print(output.shape)
7287
+ (16, 32, 10, 32, 32)
7288
+ >>> output = ops.conv3d(x, weight, pad_mode="valid", padding=0, stride=1, dilation=1, groups=1)
7289
+ >>> print(output.shape)
7290
+ (16, 32, 7, 30, 30)
7291
+ >>> output = ops.conv3d(x, weight, pad_mode="pad", padding=(2, 1, 1), stride=1, dilation=1, groups=1)
7292
+ >>> print(output.shape)
7293
+ (16, 32, 11, 32, 32)
7294
+ """
7295
+ weight_shape = weight.shape
7296
+ out_channel = weight_shape[0]
7297
+ kernel_size = weight_shape[2:5]
7298
+ if isinstance(stride, (tuple, list)):
7299
+ _check_conv_iterable_lengths(stride, dim=3, iter_name='stride')
7300
+ if isinstance(dilation, (tuple, list)):
7301
+ _check_conv_iterable_lengths(dilation, dim=3, iter_name='dilation')
7302
+ input_shape = input.shape
7303
+ in_channel = input_shape[1]
7304
+ if not (in_channel % groups == 0 and out_channel % groups == 0):
7305
+ raise ValueError("The argument 'groups' should be divisible by 'in_channel' " \
7306
+ "and 'out_channel'")
7307
+ if isinstance(padding, (list, tuple)):
7308
+ padding = _manipulate_padding(padding, dim=3)
7309
+ conv = _get_cache_prim(P.Conv3D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCDHW")
7310
+ if bias is None:
7311
+ return conv(input, weight)
7312
+ if not isinstance(bias, Tensor):
7313
+ raise TypeError(f"For 'conv3d', the 'bias' must be a Tensor, but got {type(bias)}.")
7314
+ conv_result = conv(input, weight)
7315
+ output = bias_add(conv_result, bias)
7316
+ return output
7317
+
7318
+
7319
+ def conv3d_ext(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
7320
+ r"""
7321
+ Applies a 3D convolution over an input tensor. The input tensor is typically of
7322
+ shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` or :math:`(C_{in}, D_{in}, H_{in}, W_{in})`,
7323
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`D, H, W` are the depth,
7324
+ height and width of the feature graph, respectively.
7325
+
7326
+ The output is calculated based on formula:
7327
+
7328
+ .. math::
7329
+
7330
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
7331
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
7332
+
7333
+ where :math:`bias` is the output channel bias, :math:`ccor` is
7334
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_
7335
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
7336
+
7337
+ Here are the indices' meanings:
7338
+
7339
+ - :math:`i` corresponds to the batch number, the range is :math:`[0, N-1]`,
7340
+ where :math:`N` is the batch size of the input.
7341
+
7342
+ - :math:`j` corresponds to the output channel, the range is :math:`[0, C_{out}-1]`,
7343
+ where :math:`C_{out}` is the number of
7344
+ output channels, which is also equal to the number of kernels.
7345
+
7346
+ - :math:`k` corresponds to the input channel, the range is :math:`[0, C_{in}-1]`,
7347
+ where :math:`C_{in}` is the number of
7348
+ input channels, which is also equal to the number of channels in the convolutional kernels.
7349
+
7350
+ Therefore, in the above formula, :math:`{bias}(C_{\text{out}_j})` represents the bias of the :math:`j`-th
7351
+ output channel, :math:`{weight}(C_{\text{out}_j}, k)` represents the slice of the :math:`j`-th convolutional
7352
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
7353
+ channel in the :math:`i`-th batch of the input feature map.
7354
+
7355
+ The shape of the convolutional kernel is given by :math:`(kd, kh, kw)` where :math:`kd` , :math:`kd` and\
7356
+ :math:`kw` are the depth, height and width of the kernel, respectively.
7357
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
7358
+ will be :math:`(C_{out}, C_{in} / \text{group}, kd, kh, kw)`,
7359
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
7360
+
7361
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
7362
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
7363
+
7364
+ The following lists some of the limitations of the parameters.
7365
+
7366
+ - input -- The input to the conv3d. The input must have each dimension size within the range [1, int32_max].
7367
+ - weight -- Filters of shape :math:`(C_{out}, C_{in} / groups, kd, kh, kw)`. The value of :math:`kh`
7368
+ and :math:`kw` is in the range [1, 511]. The remaining values are in the range [1, int32_max].
7369
+ And :math:`kh*kw*k0` is less 65536 (k0 is 16. If data type is float32, k0 is 8).
7370
+ - bias -- Bias Tensor with shape :math:`(C_{out})`. The shape must equal the first dimension of the weight.
7371
+ - stride -- The distance of kernel moving. It can be an int number or
7372
+ tuple (noted by :math:`(stride_d, stride_h, stride_w)`). stride_h and stride_w are in the range [1, 63].
7373
+ stride_d is in the range [1, 255].
7374
+ - padding -- If padding is an int number, it is in the range [0, 255].
7375
+ - dilation -- The value is in the range [1, 255].
7376
+ - groups -- The value is in the range [1, 65535].
7377
+ - :math:`C_{in} \% \text{groups} == 0 \quad \text{and} \quad C_{out} \% \text{groups} == 0` .
7378
+ - :math:`weight[1] == C_{in} / groups` .
7379
+ - :math:`H_{in} + PadUp + PadDown >= (kh - 1) * DilationH + 1` .
7380
+ - :math:`W_{in} + PadLeft + PadRight >= (kw - 1) * DilationW + 1` .
7381
+ - :math:`D_{in} + PadFront + PadBack >= (kd - 1) * DilationD + 1` .
7382
+ - :math:`H_{out} = (H_{in} + PadUp + PadDown - ((kh - 1) * DilationH + 1)) / StrideH + 1` .
7383
+ - :math:`W_{out} = (W_{in} + PadLeft + PadRight - ((kw - 1) * DilationW + 1)) / StrideW + 1` .
7384
+ - :math:`D_{out} = (D_{in} + PadFront + PadBack - ((kd - 1) * DilationD + 1)) / StrideD + 1` .
7385
+ - :math:`(D_{in}+PadFront+PadBack - ((kd-1)*DilationD+1)) /% StrideD <= PadBack` .
7386
+ - :math:`(H_{in}+PadUp+PadDown - ((kh-1)*Dilationh+1)) /% StrideH <= PadDown` .
7387
+ - :math:`stride_d <= kernel_d` .
7388
+ - :math:`PadUp < kh` and :math:`PadDown < kh` . When `padding` = ``'valid'``, both PadUp and PadDown are zeros.
7389
+ When `padding` = ``'same'``, pad can be calculated by
7390
+ :math:`floor(((H_{out}-1) * strideH + (kh - 1) * DilationH + 1 - H_{in}) / 2)` for high dimension.
7391
+ It is similar way to calculate the padding for depth and width dimension. And the depth and width
7392
+ dimensions also have the same constraints.
7393
+ - :math:`((kh - 1) * DilationH - PadUp)` should be in [0, 255]. It is the same constraint for depth
7394
+ and width dimension.
7395
+ - If `padding` is ``'same'``, `stride` must be 1.
7396
+
7397
+ .. warning::
7398
+ This API does not support Atlas series products.
7399
+ This is an experimental API that is subject to change or deletion.
7400
+
7401
+ Args:
7402
+ input (Tensor): Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`.
7403
+ weight (Tensor): Set size of kernel is :math:`(kd, kh,
7404
+ kw)`, then the shape is :math:`(C_{out}, C_{in} / groups, kd, kh, kw)`.
7405
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
7406
+ When bias is ``None`` , zeros will be used. Default: ``None`` .
7407
+ stride (Union(int, tuple[int], list[int]), optional): The distance of kernel moving, an int
7408
+ number that represents the depth, the height and width of movement are both strides, or a
7409
+ tuple of triple int numbers that
7410
+ represent the depth, height and width of movement respectively. Default: ``1`` .
7411
+ padding (Union(int, tuple[int], list[int], str), optional): Implicit paddings on both sides of the input `x`.
7412
+ Can be a string, one integer or a tuple/list with 3 integers.
7413
+ If `padding` is a string, the optional values are ``"same"`` , ``"valid"``.
6807
7414
 
6808
- `pad_mode` is ``"valid"``:
7415
+ - same: Adopts the way of completion. The height and width of the output will be equal to
7416
+ the input `x` divided by stride. The padding will be evenly calculated in top and bottom,
7417
+ left and right possiblily. Otherwise, the last extra padding will be calculated from the bottom
7418
+ and the right side. If this mode is set, `stride` must be 1.
6809
7419
 
6810
- .. math::
6811
- \begin{array}{ll} \\
6812
- D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
6813
- {\text{stride[0]}} + 1} \right \rfloor \\
6814
- H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
6815
- {\text{stride[1]}} + 1} \right \rfloor \\
6816
- W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
6817
- {\text{stride[2]}} + 1} \right \rfloor \\
6818
- \end{array}
7420
+ - valid: Adopts the way of discarding. The possible largest height and width of output will be returned
7421
+ without padding. Extra pixels will be discarded.
6819
7422
 
6820
- `pad_mode` is ``"pad"``:
7423
+ If `padding` is one integer, the paddings of top, bottom, left and right are the same, equal to padding.
7424
+ If `padding` is a tuple/list with 3 integers, the padding of head, tail, top, bottom,
7425
+ left and right equal to pad[0], pad[0], pad[1], pad[1], pad[2] and pad[2] correspondingly. Default: ``0`` .
7426
+ dilation (Union[int, tuple[int], list[int]], optional): Controlling the space between the kernel points.
7427
+ Default: ``1`` .
7428
+ groups (int, optional): Splits `input` into groups. Default: ``1`` .
6821
7429
 
6822
- .. math::
6823
- \begin{array}{ll} \\
6824
- D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
6825
- \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
6826
- H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
6827
- \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
6828
- W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
6829
- \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
6830
- \end{array}
7430
+ Returns:
7431
+ Tensor, the same dtype as the `input`, with the shape :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
7432
+ or :math:`(C_{out}, D_{out}, H_{out}, W_{out})`.
6831
7433
 
6832
7434
  Raises:
6833
- TypeError: If `out_channel` or `groups` is not an int.
6834
7435
  TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
7436
+ TypeError: `groups` is not an int.
6835
7437
  TypeError: If `bias` is not a Tensor.
6836
- ValueError: If the shape of `bias` is not :math:`(C_{out})`.
6837
- ValueError: If `stride` or `dilation` is less than 1.
6838
- ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
6839
- ValueError: If `padding` is a tuple or list whose length is not equal to 3.
6840
- ValueError: If `pad_mode` is not equal to 'pad' and `pad` is greater than 0.
6841
7438
 
6842
7439
  Supported Platforms:
6843
- ``Ascend`` ``GPU``
7440
+ ``Ascend``
6844
7441
 
6845
7442
  Examples:
6846
7443
  >>> import mindspore
6847
7444
  >>> import numpy as np
6848
- >>> from mindspore import Tensor, ops
6849
- >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float16)
6850
- >>> weight = Tensor(np.ones([32, 3, 4, 3, 3]), mindspore.float16)
6851
- >>> output = ops.conv3d(x, weight, pad_mode="same", padding=0, stride=1, dilation=1, groups=1)
6852
- >>> print(output.shape)
6853
- (16, 32, 10, 32, 32)
6854
- >>> output = ops.conv3d(x, weight, pad_mode="valid", padding=0, stride=1, dilation=1, groups=1)
6855
- >>> print(output.shape)
6856
- (16, 32, 7, 30, 30)
6857
- >>> output = ops.conv3d(x, weight, pad_mode="pad", padding=(2, 1, 1), stride=1, dilation=1, groups=1)
6858
- >>> print(output.shape)
6859
- (16, 32, 11, 32, 32)
7445
+ >>> from mindspore import mint
7446
+ >>> x = mindspore.Tensor(np.random.randn(12, 1, 60, 50, 8), mindspore.float16)
7447
+ >>> w = mindspore.Tensor(np.random.randn(26, 1, 2, 4, 4), mindspore.float16)
7448
+ >>> out = mint.nn.functional.conv3d(x, w)
7449
+ >>> print(out.shape)
7450
+ (12, 26, 59, 47, 5)
6860
7451
  """
6861
- weight_shape = weight.shape
6862
- out_channel = weight_shape[0]
6863
- kernel_size = weight_shape[2:5]
6864
- if isinstance(stride, (tuple, list)):
6865
- _check_conv_iterable_lengths(stride, dim=3, iter_name='stride')
6866
- if isinstance(dilation, (tuple, list)):
6867
- _check_conv_iterable_lengths(dilation, dim=3, iter_name='dilation')
6868
- input_shape = input.shape
6869
- in_channel = input_shape[1]
6870
- if not (in_channel % groups == 0 and out_channel % groups == 0):
6871
- raise ValueError("The argument 'groups' should be divisible by 'in_channel' " \
6872
- "and 'out_channel'")
6873
- if isinstance(padding, (list, tuple)):
6874
- padding = _manipulate_padding(padding, dim=3)
6875
- conv = _get_cache_prim(P.Conv3D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCDHW")
6876
- if bias is None:
6877
- return conv(input, weight)
6878
- if not isinstance(bias, Tensor):
6879
- raise TypeError(f"For 'conv3d', the 'bias' must be a Tensor, but got {type(bias)}.")
6880
- conv_result = conv(input, weight)
6881
- output = bias_add(conv_result, bias)
6882
- return output
7452
+
7453
+ if isinstance(padding, (tuple, list, int)):
7454
+ return conv3d_ext_op(input, weight, bias, stride, padding, dilation, groups)
7455
+ if isinstance(padding, str):
7456
+ return conv3d_padding_op(input, weight, bias, stride, padding, dilation, groups)
7457
+ raise TypeError(f"For conv3d, the parameter 'padding' must be a tuple/list " \
7458
+ f"or a string, but got {type(padding)}")
6883
7459
 
6884
7460
 
6885
7461
  @_primexpr
@@ -7062,6 +7638,50 @@ def glu(x, axis=-1):
7062
7638
  return x * y
7063
7639
 
7064
7640
 
7641
+ def glu_ext(input, dim=-1):
7642
+ r"""
7643
+ Computes GLU (Gated Linear Unit activation function) of the input tensor.
7644
+
7645
+ .. math::
7646
+ {GLU}(a, b)= a \otimes \sigma(b)
7647
+
7648
+ where :math:`a` is the first half of the `input` Tensor after `input` is split and :math:`b` is the second half.
7649
+
7650
+ Here :math:`\sigma` is the sigmoid function, and :math:`\otimes` is the Hadamard product.
7651
+ See `Language Modeling with Gated Convluational Networks <https://arxiv.org/abs/1612.08083>`_.
7652
+
7653
+ Args:
7654
+ input (Tensor): Tensor to be calculated. Dtype is floating point and the shape
7655
+ is :math:`(\ast_1, N, \ast_2)` where `*` means, any number of additional dimensions. :math:`N`
7656
+ is required to be an even number, where :math:`N` is the size of `input` on the dimension
7657
+ selected by `dim`.
7658
+ dim (int, optional): The dimension to split the input `input`. The value range is `[-r, r)` where `r`
7659
+ is the number of dimensions of `input`. Default: ``-1`` , the last dimension in `input`.
7660
+
7661
+ Returns:
7662
+ Tensor, the same dtype as the input `input`. The shape is :math:`(\ast_1, M, \ast_2)` where :math:`M=N/2`.
7663
+
7664
+ Raises:
7665
+ TypeError: If `input` is not a Tensor or `dim` is not an int.
7666
+ IndexError: If the value of `dim` is out of the range of `[-r, r)`, where `r` is the number
7667
+ of dimensions of `input`.
7668
+ RuntimeError: If dtype of `input` is not supported.
7669
+ RuntimeError: If the length of `input` in the dimension selected by `dim` is not even.
7670
+
7671
+ Supported Platforms:
7672
+ ``Ascend`` ``CPU``
7673
+
7674
+ Examples:
7675
+ >>> from mindspore import Tensor, ops
7676
+ >>> input = Tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]])
7677
+ >>> output = ops.function.nn_func.glu_ext(input)
7678
+ >>> print(output)
7679
+ [[0.05744425 0.11973753]
7680
+ [0.33409387 0.41398472]]
7681
+ """
7682
+ return _get_cache_prim(P.GLU)(axis=dim)(input)
7683
+
7684
+
7065
7685
  def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean'):
7066
7686
  r"""
7067
7687
  Hinge loss for optimizing a multi-class classification.
@@ -7222,7 +7842,8 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
7222
7842
  Args:
7223
7843
  input (Tensor): A tensor of shape :math:`(N, C)` , where N is batch size and C is number of classes.
7224
7844
  target (Tensor): The label target Tensor which has the same shape as `input`.
7225
- weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: ``None``.
7845
+ weight (Union[Tensor, int, float], optional): The manual rescaling weight given to each class.
7846
+ Default: ``None``.
7226
7847
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
7227
7848
  ``'sum'`` . Default: ``'mean'`` .
7228
7849
 
@@ -7291,7 +7912,8 @@ def gelu(input, approximate='none'):
7291
7912
 
7292
7913
  Args:
7293
7914
  input (Tensor): The input of the activation function GeLU, the data type is float16, float32 or float64.
7294
- approximate (str): the gelu approximation algorithm to use. Acceptable vaslues are ``'none'`` and ``'tanh'`` .
7915
+ approximate (str, optional): the gelu approximation algorithm to use.
7916
+ Acceptable vaslues are ``'none'`` and ``'tanh'`` .
7295
7917
  Default: ``'none'`` .
7296
7918
 
7297
7919
  Returns:
@@ -7309,7 +7931,7 @@ def gelu(input, approximate='none'):
7309
7931
  >>> import mindspore
7310
7932
  >>> from mindspore import Tensor, ops
7311
7933
  >>> x = Tensor([1.0, 2.0, 3.0], mindspore.float32)
7312
- >>> result = ops.gelu(x)
7934
+ >>> result = ops.gelu(x, approximate='none')
7313
7935
  >>> print(result)
7314
7936
  [0.8413447 1.9544997 2.9959505]
7315
7937
  """
@@ -7334,33 +7956,23 @@ def gelu(input, approximate='none'):
7334
7956
  def channel_shuffle(x, groups):
7335
7957
  r"""
7336
7958
  Divide the channels in a tensor of shape :math:`(*, C, H, W)` into :math:`g` groups and
7337
- rearrange them as :math:`(*, \frac{C}{g}, g, H*W)`, while keeping the original tensor shapes.
7959
+ rearrange them as :math:`(*, \frac{C}{g}, g, H*W)`, while retaining the original tensor
7960
+ shape in the final output.
7338
7961
 
7339
7962
  Args:
7340
- x (Tensor): Tensor to be divided, it has shape :math:`(*, C, H, W)`,
7341
- with float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 data type.
7963
+ x (Tensor): The input tensor.
7342
7964
  groups (int): Number of groups to divide channels in.
7343
7965
 
7344
7966
  Returns:
7345
- A Tensor, has the same type as the `x`, and has the shape :math:`(*, C, H, W)`.
7346
-
7347
- Raises:
7348
- TypeError: If data type of `x` is not one of the following:
7349
- float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
7350
- TypeError: If dim of `x` is < 4.
7351
- TypeError: If `groups` is not a positive number.
7352
- ValueError: If channel number of `x` is not divisible by `groups`.
7967
+ Tensor
7353
7968
 
7354
7969
  Supported Platforms:
7355
7970
  ``Ascend`` ``CPU``
7356
7971
 
7357
7972
  Examples:
7358
7973
  >>> import mindspore
7359
- >>> import numpy as np
7360
- >>> from mindspore import Tensor, ops
7361
- >>> group = 2
7362
- >>> x = Tensor(np.arange(1* 4 * 2 * 2).reshape(1, 4, 2, 2).astype(np.int16))
7363
- >>> y = mindspore.ops.channel_shuffle(x, group)
7974
+ >>> x = mindspore.tensor(mindspore.ops.arange(0, 16, dtype=mindspore.int16).reshape(1, 4, 2, 2))
7975
+ >>> y = mindspore.ops.channel_shuffle(x, groups=2)
7364
7976
  >>> print(y)
7365
7977
  [[[[ 0 1]
7366
7978
  [ 2 3]]
@@ -7550,6 +8162,96 @@ def lp_pool2d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
7550
8162
  return ((sign(out) * ops.relu(ops.abs(out))) * (kernel_size[0] * kernel_size[1])).pow(1.0 / norm_type)
7551
8163
 
7552
8164
 
8165
+ def relu(input, inplace=False):
8166
+ r"""
8167
+ Computes ReLU (Rectified Linear Unit activation function) of input tensors element-wise.
8168
+
8169
+ It returns :math:`\max(input,\ 0)` element-wise. Specially, the neurons with the negative output
8170
+ will be suppressed and the active neurons will stay the same.
8171
+
8172
+ .. math::
8173
+
8174
+ ReLU(input) = (input)^+ = \max(0, input)
8175
+
8176
+ ReLU Activation Function Graph:
8177
+
8178
+ .. image:: ../images/ReLU.png
8179
+ :align: center
8180
+
8181
+ Args:
8182
+ input (Tensor): The input Tensor.
8183
+ inplace (bool, optional): Whether to use inplace mode, Defaults to ``False``.
8184
+
8185
+ Returns:
8186
+ Tensor, with the same dtype and shape as the `input`.
8187
+
8188
+ Raises:
8189
+ TypeError: If dtype of `input` is not Number type.
8190
+ TypeError: If `input` is not a Tensor.
8191
+
8192
+ Supported Platforms:
8193
+ ``Ascend`` ``GPU`` ``CPU``
8194
+
8195
+ Examples:
8196
+ >>> import mindspore
8197
+ >>> import numpy as np
8198
+ >>> from mindspore import Tensor, ops
8199
+ >>> input = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
8200
+ >>> output = ops.relu(input)
8201
+ >>> print(output)
8202
+ [[0. 4. 0.]
8203
+ [2. 0. 9.]]
8204
+ """
8205
+ if inplace:
8206
+ return inplace_relu_op(input)
8207
+ return relu_op(input)
8208
+
8209
+
8210
+ def relu_(input):
8211
+ r"""
8212
+ ReLuComputes ReLU (Rectified Linear Unit activation function) inplace of input tensors element-wise.
8213
+
8214
+ It returns :math:`\max(input,\ 0)` element-wise. Specially, the neurons with the negative output
8215
+ will be suppressed and the active neurons will stay the same.
8216
+
8217
+ .. math::
8218
+
8219
+ ReLU(input) = (input)^+ = \max(0, input)
8220
+
8221
+ ReLU Activation Function Graph:
8222
+
8223
+ .. image:: ../images/ReLU.png
8224
+ :align: center
8225
+
8226
+ .. warning::
8227
+ This is an experimental API that is subject to change or deletion.
8228
+
8229
+ Args:
8230
+ input (Tensor): The input Tensor.
8231
+
8232
+ Returns:
8233
+ Tensor, with the same dtype and shape as the `input`.
8234
+
8235
+ Raises:
8236
+ TypeError: If dtype of `input` is not Number type.
8237
+ TypeError: If `input` is not a Tensor.
8238
+
8239
+ Supported Platforms:
8240
+ ``Ascend``
8241
+
8242
+ Examples:
8243
+ >>> import mindspore
8244
+ >>> import numpy as np
8245
+ >>> from mindspore import Tensor, ops
8246
+ >>> input = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
8247
+ >>> ops.relu_(input)
8248
+ >>> print(input)
8249
+ [[0. 4. 0.]
8250
+ [2. 0. 9.]]
8251
+ """
8252
+ return inplace_relu_op(input)
8253
+
8254
+
7553
8255
  def mse_loss(input, target, reduction='mean'):
7554
8256
  r"""
7555
8257
  Calculates the mean squared error between the predicted value and the label value.
@@ -7623,35 +8325,36 @@ def mse_loss(input, target, reduction='mean'):
7623
8325
 
7624
8326
  def msort(input):
7625
8327
  r"""
7626
- Sorts the elements in Tensor in ascending order of value along its first dimension.
7627
-
7628
- ops.msort(t) is equivalent to ops.Sort(axis=0)(t)[0]. See also :class:`mindspore.ops.Sort()`.
8328
+ Return a tensor obtained by sorting the input tensor in ascending order along its first dimension.
7629
8329
 
7630
- .. Note::
7631
- The Ascend backend only supports sorting the 1D input.
8330
+ `ops.msort(input)` is equivalent to `ops.sort(axis=0)(input)[0]`. See also :class:`mindspore.ops.Sort()` for more
8331
+ details.
7632
8332
 
7633
8333
  Args:
7634
- input (Tensor): The input to sort, with float16 or float32 data type.
8334
+ input (Tensor): The input tensor to sort.
7635
8335
 
7636
8336
  Returns:
7637
- A tensor whose values are the sorted values, with the same shape and data type as input.
7638
-
7639
- Raises:
7640
- TypeError: If dtype of `input` is neither float16 nor float32.
8337
+ Tensor
7641
8338
 
7642
8339
  Supported Platforms:
7643
8340
  ``Ascend`` ``GPU`` ``CPU``
7644
8341
 
7645
8342
  Examples:
7646
- >>> import mindspore as ms
7647
- >>> from mindspore import ops
7648
- >>> import numpy as np
7649
- >>> input = ms.Tensor(np.array([[8, 2, 1], [5, 9, 3], [4, 6, 7]]), ms.float16)
7650
- >>> output = ops.msort(input)
7651
- >>> print(output)
7652
- [[4. 2. 1.]
7653
- [5. 6. 3.]
7654
- [8. 9. 7.]]
8343
+ >>> import mindspore
8344
+ >>> input = mindspore.tensor([[8, 2, 1],
8345
+ ... [5, 9, 3],
8346
+ ... [4, 6, 7]])
8347
+ >>> mindspore.ops.msort(input)
8348
+ Tensor(shape=[3, 3], dtype=Int64, value=
8349
+ [[4, 2, 1],
8350
+ [5, 6, 3],
8351
+ [8, 9, 7]])
8352
+ >>> # is equivalent to `ops.sort(axis=0)(input)[0]`
8353
+ >>> mindspore.ops.sort(input, axis=0)[0]
8354
+ Tensor(shape=[3, 3], dtype=Int64, value=
8355
+ [[4, 2, 1],
8356
+ [5, 6, 3],
8357
+ [8, 9, 7]])
7655
8358
  """
7656
8359
  return ops.Sort(axis=0)(input)[0]
7657
8360
 
@@ -7667,7 +8370,8 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
7667
8370
  as `anchor`.
7668
8371
  negative (Tensor): A sample belonging to the different class from `anchor`, with the same type and shape
7669
8372
  as `anchor`.
7670
- margin (float, optional): Make a margin between the positive pair and the negative pair. Default: ``1.0`` .
8373
+ margin (float, optional): Make a margin between the positive pair and the negative pair. The shape of margin
8374
+ must be 0. Default: ``1.0`` .
7671
8375
  p (int, optional): The degree of norm for pairwise distance. Default: ``2`` .
7672
8376
  eps (float, optional): Add small value to avoid division by zero. Default: ``1e-06``.
7673
8377
  swap (bool, optional): The distance swap change the negative distance to the distance between positive
@@ -8131,7 +8835,7 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
8131
8835
  return out
8132
8836
 
8133
8837
 
8134
- def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, *, ceil_mode=False, return_indices=False):
8838
+ def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False):
8135
8839
  r"""
8136
8840
  Performs a 2D max pooling on the input Tensor.
8137
8841
 
@@ -8153,21 +8857,23 @@ def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, *, ce
8153
8857
  kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg
8154
8858
  value, is an int number that represents height and width of the kernel, or a tuple of
8155
8859
  two int numbers that represent height and width respectively.
8156
- stride (Union[int, tuple[int], None]): The distance of kernel moving, an int number that represents
8860
+ stride (Union[int, tuple[int], None], optional): The distance of kernel moving, an int number that represents
8157
8861
  the height and width of movement are both stride, or a tuple of two int numbers that
8158
8862
  represent height and width of movement respectively.
8159
8863
  Default: ``None`` , which indicates the moving step is `kernel_size` .
8160
- padding (Union[int, tuple[int]]): An int number that represents the height and width of movement are both
8864
+ padding (Union[int, tuple[int]], optional):
8865
+ An int number that represents the height and width of movement are both
8161
8866
  strides, or a tuple of two int numbers that represent height and width of movement respectively.
8162
8867
  Default: ``0`` .
8163
- dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: ``1`` .
8164
- ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
8165
- return_indices (bool): Whether to output the indices of max value. Default: ``False`` .
8868
+ dilation (Union[int, tuple[int]], optional): Control the stride of elements in the kernel. Default: ``1`` .
8869
+ ceil_mode (bool, optional): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
8870
+ return_indices (bool, optional): Whether to output the indices of max value. Default: ``False`` .
8166
8871
 
8167
8872
  Returns:
8168
8873
  If `return_indices` is ``False`` , return a Tensor `output`, else return a tuple (`output`, `argmax`).
8169
8874
 
8170
- - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`.
8875
+ - **output** (Tensor) - Maxpooling result,
8876
+ with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`.
8171
8877
  It has the same data type as `input`.
8172
8878
 
8173
8879
  .. math::
@@ -8198,10 +8904,9 @@ def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, *, ce
8198
8904
  >>> import mindspore
8199
8905
  >>> import numpy as np
8200
8906
  >>> from mindspore import Tensor, ops
8201
- >>> from mindspore.ops.function.nn_func import max_pool2d_ext
8202
8907
  >>> input = Tensor(np.arange(20 * 16 * 50 * 32).reshape((20, 16, 50, 32)), mindspore.float32)
8203
- >>> output_tensor, argmax = max_pool2d_ext(input, kernel_size=(3, 2), stride=(2, 1),
8204
- ceil_mode=False, return_indices=True)
8908
+ >>> output_tensor, argmax = ops.function.nn_func.max_pool2d_ext(input, kernel_size=(3, 2), stride=(2, 1),
8909
+ ... ceil_mode=False, return_indices=True)
8205
8910
  >>> print(output_tensor.shape)
8206
8911
  (20, 16, 24, 31)
8207
8912
  >>> print(argmax.shape)
@@ -8219,62 +8924,199 @@ def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, *, ce
8219
8924
  return out
8220
8925
 
8221
8926
 
8222
- def prompt_flash_attention(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift,
8223
- deq_scale1, quant_scale1, deq_scale2, quant_scale2, quant_offset2, num_heads,
8224
- scale_value=1.0, pre_tokens=2147483547, next_tokens=0, input_layout='BSH',
8225
- num_key_value_heads=0, sparse_mode=0, inner_precise=1):
8927
+ def prompt_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=None, actual_seq_lengths_kv=None,
8928
+ pse_shift=None, deq_scale1=None, quant_scale1=None, deq_scale2=None, quant_scale2=None,
8929
+ quant_offset2=None, num_heads=1, scale_value=1.0, pre_tokens=2147483647, next_tokens=0,
8930
+ input_layout='BSH', num_key_value_heads=0, sparse_mode=0, inner_precise=1):
8226
8931
  r"""
8227
8932
  The interface for fully inference.
8228
- B -- Batch size
8229
- S -- Sequence length
8230
- H -- Hidden size
8933
+
8934
+ - B: Batch size
8935
+ - N: Num of attention heads
8936
+ - S: Sequence length
8937
+ - D: Head dim
8938
+ - H: Hidden layer size
8939
+
8940
+ Self attention constructs an attention model based on the relationship between input samples themselves. The
8941
+ principle is to assume that there is an input sample sequence :math:`x` of length :math:`n`, and each
8942
+ element of :math:`x` is a :math:`d` dimensional vector, which can be viewed as a token embedding. This sequence
8943
+ can be transformed through 3 weight matrices to obtain 3 matrices with dimensions of :math:`n\times d`.
8944
+
8945
+ The self attention calculation formula is defined as:
8946
+
8947
+ .. math::
8948
+ Attention(Q,K,V)=Softmax(\frac{QK^{T} }{\sqrt{d} } )V
8949
+
8950
+ where the product of :math:`Q` and :math:`K^{T}` represents the attention of input :math:`x`. To avoid the value
8951
+ becoming too large, it is usually scaled by dividing it by the square root of :math:`d` and perform softmax
8952
+ normalization on each row, yields a matrix of :math:`n\times d` after multiplying :math:`V`.
8953
+
8954
+ .. warning::
8955
+ - Support dtype of float16 for `attn_mask` will be deprecated in the future.
8956
+ - When `sparse_mode` is 2, 3 or 4, the shape of `attn_mask` must be :math:`(2048, 2048)` /
8957
+ :math:`(B, 1, 2048, 2048)` / :math:`(1, 1, 2048, 2048)`.
8231
8958
 
8232
8959
  Note:
8233
- experiment ops
8960
+ - Maximum Support for each axis
8961
+
8962
+ - Supports B-axis values less than or equal to 65536 (64k).
8963
+ When the input type includes int8 with D-axis not aligned to 32, or the input type is
8964
+ float16 or bfloat16 with D-axis not aligned to 16, the B-axis supports up to 128 only.
8965
+ - Supports N-axis values less than or equal to 256.
8966
+ - Supports S-axis values less than or equal to 20971520 (20M).
8967
+ - Supports D-axis values less than or equal to 512.
8968
+
8969
+ - Quantization
8970
+
8971
+ - int8 Input, int8 Output: Parameters `deq_scale1`, `quant_scale1`, `deq_scale2`, and `quant_scale2`
8972
+ must all be provided. `quant_offset2` is optional (default is 0 if not provided).
8973
+ - int8 Input, float16 Output: Parameters `deq_scale1`, `quant_scale1`, and `deq_scale2` must all be provided.
8974
+ If `quant_offset2` or `quant_scale2` is provided (i.e., not null), it will result in an error.
8975
+ - float16 or bfloat16 Input, int8 Output: Parameter `quant_scale2` must be provided. `quant_offset2` is
8976
+ optional (default is 0 if not provided). If `deq_scale1`, `quant_scale1`, or `deq_scale2` is
8977
+ provided (i.e., not null), it will result in an error.
8978
+ - int8 Output:
8979
+
8980
+ - `quant_scale2` and `quant_offset2` in per-channel format do not support scenarios with
8981
+ left padding, Ring Attention, or non-32-byte aligned D-axis.
8982
+ - In GE mode: `quant_scale2` and `quant_offset2` in per-tensor format do not support scenarios
8983
+ with non-32-byte aligned D-axis.
8984
+ - Does not support sparse as band and `pre_tokens`/`next_tokens` being negative.
8985
+
8986
+ - `quant_scale2` and `quant_offset2` can be bfloat16 only when `query` is bfloat16.
8987
+
8988
+
8989
+ - Other Usage Caveats:
8990
+
8991
+ - :math:`N` of parameter `query` must be equal to `num_heads`. :math:`N` of parameter `key` and parameter
8992
+ `value` must be equal to `num_key_value_heads`.
8993
+ - `num_heads` must be divisible by `num_key_value_heads` and `num_heads` divided by `num_key_value_heads`
8994
+ can not be greater than 64.
8995
+ - When `query` dtype is bfloat16, D axis should align with 16.
8996
+ - Each element of `actual_seq_lengths` must not exceed q_S and element
8997
+ of `actual_seq_lengths_kv` must not exceed kv_S.
8234
8998
 
8235
8999
  .. warning::
8236
- This is an experimental API that is subject to change or deletion.
9000
+ Only support on Atlas A2 training series.
9001
+
9002
+ Args:
9003
+ query (Tensor): The query tensor with data type of int8, float16 or bfloat16.
9004
+ The shape is :math:`(B, q_S, q_H)` / `(B, q_N, q_S, q_D)`.
9005
+ key (Tensor): The key tensor with the same dtype as `query`.
9006
+ The shape is :math:`(B, kv_S, kv_H)` / `(B, kv_N, kv_S, kv_D)`.
9007
+ value (Tensor): The value tensor with the same dtype as `query`.
9008
+ The shape is :math:`(B, kv_S, kv_H)` / `(B, kv_N, kv_S, kv_D)`.
9009
+ attn_mask (Tensor, optional) - The attention mask tensor with data type of bool, int8, uint8 or float16.
9010
+ For each element, 0/False indicates retention and 1/True indicates discard.
9011
+ If `sparse_mode` is 0 or 1: the shape is :math:`(q_S, kv_S)` / :math:`(B, q_S, kv_S)` /
9012
+ :math:`(1, q_S, kv_S)` / :math:`(B, 1, q_S, kv_S)` / :math:`(1, 1, q_S, kv_S)`.
9013
+ If `sparse_mode` is 2, 3 or 4, the shape is :math:`(2048, 2048)` / :math:`(1, 2048, 2048)` /
9014
+ :math:`(1, 1, 2048, 2048)`.
9015
+ Default: ``None``.
9016
+ actual_seq_lengths (Union[Tensor, tuple[int], list[int]], optional): Describe actual sequence length of each
9017
+ batch of `query` with data type of int64. The shape is :math:`(B, )` and every element should be
9018
+ positive integer.
9019
+ Default: ``None``.
9020
+ actual_seq_lengths_kv (Union[Tensor, tuple[int], list[int]], optional): Describe actual sequence length of each
9021
+ batch of `key` or `value` with data type of int64. The shape is :math:`(B, )` and every element should be
9022
+ positive integer.
9023
+ Default: ``None``.
9024
+ pse_shift (Tensor, optional): The position encoding tensor with data type of float16 or bfloat16.
9025
+ Input tensor of shape :math:`(B, N, q_S, kv_S)` / :math:`(1, N, q_S, kv_S)`.
9026
+ Default: ``None``.
8237
9027
 
8238
- Inputs:
8239
- query (Tensor) - The query tensor with data type of float16 or float32.
8240
- Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
8241
- key (Tensor) - The key tensor with data type of float16 or float32.
8242
- Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
8243
- value (Tensor) - The value tensor with data type of float16 or float32.
8244
- Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
8245
- attn_mask (Tensor) - The attention mask tensor with data type of float16 or float32.
8246
- For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
8247
- actual_seq_lengths (Tensor): Describe actual sequence length of each input with data type of int64.
8248
- actual_seq_lengths_kv (Tensor): Describe actual sequence length of each input with data type of int64.
8249
- pse_shift (Tensor) - The position encoding tensor with data type of float16 or float32.
8250
- dep_scale1 (Tensor)
8251
- quant_scale1 (Tensor)
8252
- deq_scale2 (Tensor)
8253
- quant_scale2 (Tensor)
8254
- quant_offset2 (Tensor)
8255
- num_heads (int): The number of heads.
8256
- scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
8257
- Muls in the calculation. Default: 1.0.
8258
- pre_tokens (int): Previous tokens. Default: 2147483547.
8259
- next_tokens (int): next tokens. Default: 0.
8260
- indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
8261
- indicates that the data blocks in the upper triangle are not involved in the calculation
8262
- input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
8263
- num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
8264
- The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
8265
- sparse_mode (int): Default: 0
8266
- inner_precise (int): 0, float16 high precision. 1, high performance. default 1
9028
+ - q_S must be greater than or equal to the query's S length, and kv_S must be greater than or
9029
+ equal to the key's S length.'
9030
+ - If `pse_shift` has dtype float16, `query` should have dtype float16 or int8, in which case high
9031
+ precision mode is enabled automatically.
9032
+ - If `pse_shift` has dtype bfloat16, `query` should have dtype bfloat16.
9033
+
9034
+ deq_scale1 (Tensor, optional): Quantitative parametor, the tensor with data type of uint64 or float32.
9035
+ Input Tensor of shape :math:`(1,)`.
9036
+ Default: ``None``.
9037
+ quant_scale1 (Tensor, optional): Quantitative parametor, the tensor with data type of float32.
9038
+ Input Tensor of shape :math:`(1,)`.
9039
+ Default: ``None``.
9040
+ deq_scale2 (Tensor, optional): Quantitative parametor, input Tensor of shape :math:`(1,)` and it has
9041
+ the same dtype as `deq_scale1`.
9042
+ Default: ``None``.
9043
+ quant_scale2 (Tensor, optional): Quantitative parametor, the tensor with data type of float32 or bfloat16.
9044
+ The suggested shape is :math:`(1,)` / :math:`(1, 1, q_H)` / :math:`(q_H, )` when output layout is BSH,
9045
+ :math:`(1,)` / :math:`(1, q_N, 1, D)` / :math:`(q_N, D) when layout is BNSD.
9046
+ Default: ``None``.
9047
+ quant_offset2 (Tensor, optional): Quantitative parametor, the tensor with data type of float32 or bfloat16.
9048
+ It has the same dtype and shape as `quant_scale2`.
9049
+ Default: ``None``.
9050
+ num_heads (int, optional): The number of heads. It is an integer in range [0, 256].
9051
+ Default: ``1``.
9052
+ scale_value (double, optional): The scale value indicating the scale coefficient, which is used as the scalar of
9053
+ Muls in the calculation.
9054
+ Default: ``1.0``.
9055
+ pre_tokens (int, optional): For sparse cumputing, indicating the number of previous tokens Attention needs to
9056
+ associated with.
9057
+ Default: ``2147483647``.
9058
+ next_tokens (int, optional): For sparse cumputing, indicating the number of next tokens Attention needs to
9059
+ associated with.
9060
+ Default: ``0``.
9061
+ input_layout (str, optional): the data layout of the input qkv, support `(BSH)` and `(BNSD)`.
9062
+ Default ``BSH``.
9063
+ num_key_value_heads (int, optional): An int indicates head numbers of ``key``/``value`` which are used
9064
+ in GQA algorithm. The value 0 indicates if the key and value have the same head nums, use `num_heads`.
9065
+ It it is specified(not 0), it must be a factor of `num_heads` and it must be equal to kv_n.
9066
+ Default: ``0``.
9067
+ sparse_mode (int, optional): An int specifies sparse mode, can be int from {0, 1, 2, 3, 4}.
9068
+ Default: ``0``.
9069
+
9070
+ - sparseMode = 0: If `attn_mask` is a null pointer, `pre_tokens` and `next_tokens` inputs are ignored
9071
+ (internally set to INT_MAX).
9072
+ - sparseMode = 2, 3, 4: `attn_mask` shape must be :math:`(S, S)` or :math:`(1, S, S)` or
9073
+ :math:`(1, 1, S, S)`, with S fixed at 2048. User must ensure that `attn_mask` is lower triangular.
9074
+ If not provided or incorrect shape, it will result in an error.
9075
+ - sparseMode = 1, 2, 3: Ignores `pre_tokens`, `next_tokens` inputs and sets values according
9076
+ to specific rules.
9077
+ - sparseMode = 4: `pre_tokens` and `next_tokens` must be non-negative.
9078
+
9079
+ inner_precise (int, optional): An int number from {0, 1} indicates computing mode.
9080
+ ``0`` for high precision mode for float16 dtype. ``1`` for high performance mode.
9081
+ Default: ``1``.
8267
9082
 
9083
+ Returns:
9084
+ attention_out (Tensor) - Output tensor, has the same shape as `query` of
9085
+ :math:`(B, q_S, q_H)` / :math:`(B, q_N, q_S, q_D)`.
9086
+ Output dtype is determined by multiple factors, please refer to Note above for details.
8268
9087
 
8269
- Outputs:
8270
- attention_out (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
9088
+ Raises:
9089
+ TypeError: Dtype of `query` is not int8, float16 or bfloat16.
9090
+ TypeError: `query`, `key` and `value` don't have the same dtype.
9091
+ TypeError: Dtype of `attn_mask` is not bool, int8 or uint8.
9092
+ TypeError: Dtype of `pse_shift` is not bfloat16 or float16.
9093
+ TypeError: `scale_value` is not a double number.
9094
+ TypeError: `input_layout` is not a string.
9095
+ TypeError: `num_key_value_heads` is not an int.
9096
+ TypeError: `sparse_mode` is not an int.
9097
+ TypeError: `sparse_inner_precisemode` is not an int.
9098
+ TypeError: `quant_scale1` is not Tensor of type float32.
9099
+ TypeError: `deq_scale1` is not Tensor of type uint64 or float32.
9100
+ TypeError: `quant_scale2` is not Tensor of type float32.
9101
+ TypeError: `deq_scale2` is not Tensor of type uint64 or float32.
9102
+ TypeError: `quant_offset2` is not Tensor of type float32.
9103
+ ValueError: `input_layout` is a string but of `(BSH)` or `(BNSD)`.
9104
+ RuntimeError: `num_heads` is not divisible by `num_key_value_heads`.
9105
+ RuntimeError: `num_heads` is not greater than 0.
9106
+ RuntimeError: `num_key_value_heads` is not greater than or equal to 0.
9107
+ RuntimeError: kv_n is not equal to `num_key_value_heads`.
9108
+ RuntimeError: `attn_mask` shape is not valid.
9109
+ RuntimeError: `sparse_mode` is specified but is not 0, 1, 2, 3 or 4.
9110
+ RuntimeError: `query` dtype is bfloat16 and D axis is not aligned with 16.
9111
+ RuntimeError: `input_layout` is BSH and kv_h is not divisible by `num_key_value_heads`.
9112
+ RuntimeError: D-axis of `query`, `key` and `value` is not the same.
9113
+ RuntimeError: In post quant per-channel scenario, D-axis is not 32 Byte aligned.
8271
9114
 
8272
- Supported Platforms:
9115
+ Supported Platforms:
8273
9116
  ``Ascend``
8274
9117
 
8275
9118
  Examples:
8276
- >>> from mindspore.ops.function.nn_func import prompt_flash_attention
8277
- >>> from mindspore import Tensor
9119
+ >>> from mindspore import Tensor, ops
8278
9120
  >>> import numpy as np
8279
9121
  >>> B = 1
8280
9122
  >>> N = 16
@@ -8284,13 +9126,13 @@ def prompt_flash_attention(query, key, value, attn_mask, actual_seq_lengths, act
8284
9126
  >>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
8285
9127
  >>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
8286
9128
  >>> out = ops.prompt_flash_attention(query, key, value, None, None, None, None, None, None, None, None,
8287
- None, N, input_layout='BNSD')
9129
+ ... None, N, input_layout='BNSD')
8288
9130
  >>> print(out.shape)
8289
9131
  (1, 16, 256, 16)
8290
9132
  """
8291
9133
 
8292
- pfa = _get_cache_prim(NN_OPS.PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
8293
- num_key_value_heads, sparse_mode, inner_precise)
9134
+ pfa = _get_cache_prim(PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
9135
+ num_key_value_heads, sparse_mode, inner_precise)
8294
9136
  return pfa(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift, deq_scale1,
8295
9137
  quant_scale1, deq_scale2, quant_scale2, quant_offset2)
8296
9138
 
@@ -8301,21 +9143,17 @@ def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=
8301
9143
  num_heads=1, input_layout='BSH', scale_value=1.0, num_key_value_heads=0,
8302
9144
  block_size=0, inner_precise=1, kv_padding_size=None):
8303
9145
  r"""
8304
- B -- Batch size
8305
-
8306
- N -- Num heads
8307
-
8308
- kvN -- Num key value heads
8309
-
8310
- S -- Sequence length
9146
+ The interface for incremental inference.
8311
9147
 
8312
- D -- Head dim
9148
+ - B: Batch size
9149
+ - N: Num of attention heads
9150
+ - kvN: Num of `key` / `value` heads
9151
+ - S: Sequence length
9152
+ - D: Head dim
9153
+ - H: Hidden layer size
9154
+ - kvH: Hidden size of `key` / `value`
8313
9155
 
8314
- H -- Hidden size
8315
-
8316
- kvH -- Hidden size of key value
8317
-
8318
- where :math:`H=N\times D`, :math:`kvH=kvN\times D`
9156
+ where :math:`H=N\times D`, :math:`kvH=kvN\times D`.
8319
9157
 
8320
9158
  Self attention constructs an attention model based on the relationship between input samples themselves. The
8321
9159
  principle is to assume that there is a length of the input sample sequence :math:`x` of :math:`n`, and each
@@ -8330,62 +9168,62 @@ def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=
8330
9168
  becoming too large, it is usually scaled by dividing it by the square root of :math:`d` and perform softmax
8331
9169
  normalization on each row, yields a matrix of :math:`n\times d` after multiplying :math:`V`.
8332
9170
 
8333
- .. warning::
8334
- This is an experimental API that is subject to change or deletion.
8335
-
8336
9171
  Note:
8337
- - If there is no input parameter and no default value, None needs to be passed.
8338
- - The shape of the tensor corresponding to the key and value parameters needs to be completely consistent.
8339
- - :math:`N` of parameter query is equal with num_heads. :math:`N` of parameter key and parameter value is equal
8340
- with num_key_value_heads. num_heads is a multiple of num_key_value_heads.
9172
+ - If there is no input parameter and no default value, ``None`` needs to be passed.
9173
+ - The shape of the tensor corresponding to the `key` and `value` parameters needs to be completely consistent.
9174
+ - :math:`N` of parameter `query` is equal with `num_heads`. :math:`N` of parameter `key` and parameter `value`
9175
+ is equal with `num_key_value_heads`. `num_heads` is a multiple of `num_key_value_heads`.
8341
9176
  - Quantization
8342
9177
 
8343
- - When the data type of query, key, and value is float16 and the data type of output is int8, the input
8344
- parameter quant_scale2 is required and quant_offset2 is optional.
8345
- - When antiquant_scale exists, key and value need to be passed by int8. antiquant_offset is optional.
8346
- - The data type of antiquant_scale and antiquant_offset should be consistency with that of query.
8347
- - pse_shift
9178
+ - When the data type of `query`, `key`, and `value` is float16 and the data type of output is int8, the input
9179
+ parameter `quant_scale2` is required and `quant_offset2` is optional.
9180
+ - When `antiquant_scale` exists, `key` and `value` need to be passed by int8. `antiquant_offset` is optional.
9181
+ - The data type of `antiquant_scale` and `antiquant_offset` should be consistenct with that of `query`.
9182
+ - `pse_shift`
8348
9183
 
8349
- - The pse_shift data type needs to be consistent with the query data type, and only supports D-axis alignment,
9184
+ - The `pse_shift` data type needs to be consistent with `query`, and only supports D-axis alignment,
8350
9185
  which means that the D-axis can be divided by 16.
8351
9186
  - Page attention:
8352
9187
 
8353
- - The necessary condition for enabling page attention is that the block_table exists, and the key
8354
- and value are arranged in a contiguous memory according to the index in the block_table. The support for
8355
- key and value dtypes is float16/bfloat16/int8.
8356
- - In the enabling scenario of page attention, 16 alignment is required when input types of key and value are
8357
- float16/bfloat16, and 32 alignment is required when input types of key and value are int8. It is
8358
- recommended to use 128.
9188
+ - The necessary condition for enabling page attention is that the `block_table` exists, and the `key`
9189
+ and `value` are arranged in a contiguous memory according to the index in the `block_table`. The support
9190
+ dtype for `key` and `value` is float16/bfloat16/int8.
9191
+ - In the enabling scenario of page attention, 16 alignment is required when input types of `key`
9192
+ and `value` are float16/bfloat16, and 32 alignment is required when input dtype of `key` and `value`
9193
+ is int8. It is recommended to use 128.
8359
9194
  - The maximum max_block_num_per_seq currently supported by blocktable is 16k, and exceeding 16k will result
8360
9195
  in interception and error messages; If you encounter :math:`S` being too large and causing
8361
- max_block_num_per_seq to exceed 16k, you can increase the block_size to solve the problem.
8362
- - The multiplication of all dimensions of the shape of the parameters key and value in the page attention
9196
+ max_block_num_per_seq to exceed 16k, you can increase the `block_size` to solve the problem.
9197
+ - The multiplication of all dimensions of the shape of the parameters `key` and `value` in the page attention
8363
9198
  scenario cannot exceed the representation range of int32.
8364
9199
  - When performing per-channel post quantization, page attention cannot be enabled simultaneously.
8365
- - kv_padding_size:
9200
+ - `kv_padding_size`:
8366
9201
 
8367
9202
  - The calculation formula for the starting point of KV cache transfer is
8368
9203
  :math:`S-kv\_padding\_size-actual\_seq\_lengths`. The calculation formula for the transfer endpoint of KV
8369
9204
  cache is :math:`S-kv\_padding\_size`. When the starting or ending point of the KV cache transfer is less
8370
9205
  than 0, the returned data result is all 0.
8371
- - When kv_padding_size is less than 0, it will be set to 0.
8372
- - kv_padding_size needs to be enabled together with the actual_seq_lengths parameter, otherwise it is
9206
+ - When `kv_padding_size` is less than 0, it will be set to 0.
9207
+ - `kv_padding_size` needs to be enabled together with the `actual_seq_lengths` parameter, otherwise it is
8373
9208
  considered as the KV right padding scene.
8374
9209
  - It needs to be enabled together with the atten_mask parameter and ensure that the meaning of atten_mask is
8375
9210
  correct, that is, it can correctly hide invalid data. Otherwise, it will introduce accuracy issues.
8376
- - kv_padding_size does not support page attention scenarios
9211
+ - `kv_padding_size` does not support page attention scenarios.
9212
+
9213
+ .. warning::
9214
+ Only support on Atlas A2 training series.
8377
9215
 
8378
9216
  Args:
8379
9217
  query (Tensor): The query tensor with data type of float16 or bfloat16.
8380
9218
  The shape is :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
8381
- key (TensorList): The key tensor with data type of float16 or bfloat16 or int8.
9219
+ key (Union[tuple, list]): The key tensor with data type of float16 or bfloat16 or int8.
8382
9220
  The shape is :math:`(B, S, kvH)` / :math:`(B, kvN, S, D)`.
8383
- value (TensorList): The value tensor with data type of float16 or bfloat16 or int8.
9221
+ value (Union[tuple, list]): The value tensor with data type of float16 or bfloat16 or int8.
8384
9222
  The shape is :math:`(B, S, kvH)` / :math:`(B, kvN, S, D)`.
8385
9223
  attn_mask (Tensor, optional): The attention mask tensor with data type of bool or int8 or uint8.
8386
9224
  The shape is :math:`(B, S)` / :math:`(B, 1, S)` / :math:`(B, 1, 1, S)`. Default: ``None``.
8387
9225
  actual_seq_lengths (Union[Tensor, tuple[int], list[int]], optional): Describe actual sequence length of each
8388
- input with data type of int32 or int64. The shape is :math:`(B, )`. Default: ``None``.
9226
+ input with data type of int64. The shape is :math:`(B, )`. Default: ``None``.
8389
9227
  pse_shift (Tensor, optional): The position encoding tensor with data type of float16 or bfloat16. Input tensor
8390
9228
  of shape :math:`(1, N, 1, S)` / :math:`(B, N, 1, S)`. Default: ``None``.
8391
9229
  dequant_scale1 (Tensor, optional): Quantitative parametor, the tensor with data type of uint64 or float32. It
@@ -8400,22 +9238,25 @@ def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=
8400
9238
  The shape is :math:`(1,)`. Default: ``None``.
8401
9239
  antiquant_scale (Tensor, optional): Pseudo Quantitative parametor, the tensor with data type of float16 or
8402
9240
  bfloat16. The shape is :math:`(2, kvN, 1, D)` when input_layout is 'BNSD' or :math:`(2, kvH)` when
8403
- input_layout is 'BSH'. Default: ``None``.
9241
+ input_layout is 'BSH'. Default: ``None``.
8404
9242
  antiquant_offset (Tensor, optional): Pseudo Quantitative parametor, the tensor with data type of float16 or
8405
9243
  bfloat16. The shape is :math:`(2, kvN, 1, D)` when input_layout is 'BNSD' or :math:`(2, kvH)` when
8406
- input_layout is 'BSH'. Default: ``None``.
9244
+ input_layout is 'BSH'. Default: ``None``.
8407
9245
  block_table (Tensor, optional): The tensor with data type of int32. The shape is
8408
9246
  :math:`(B, max\_block\_num\_per\_seq)`,
8409
9247
  where :math:`max\_block\_num\_per\_seq = ceil(\frac{max(actual\_seq\_length)}{block\_size} )`.
8410
9248
  Default: ``None``.
8411
- num_heads (int): The number of heads.
8412
- input_layout (str): The data layout of the input qkv, support 'BSH' and 'BNSD'. Default ``'BSH'``.
8413
- scale_value (double): The scale value indicating the scale coefficient, which is used as the scalar of
8414
- Muls in the calculation. Default: ``1.0``.
8415
- num_key_value_heads (int): Head numbers of key/value which are used in GQA algorithm.
8416
- The value 0 indicates if the key and value have the same head nums, use numHeads. Default: ``0``.
8417
- block_size (int): The maximum number of tokens stored in each block of KV in page attention. Default: ``0``.
8418
- inner_precise (int): Default: ``1``.
9249
+ num_heads (int, optional): The number of heads. Default: ``1``.
9250
+ input_layout (str, optional): The data layout of the input qkv, support 'BSH' and 'BNSD'. Default ``'BSH'``.
9251
+ scale_value (double, optional): The scale value indicating the scale coefficient, which is used as
9252
+ the scalar of Muls in the calculation. Default: ``1.0``.
9253
+ num_key_value_heads (int, optional): Head numbers of `key`/`value` which are used in GQA algorithm.
9254
+ The value 0 indicates if the `key` and `value` have the same head nums, use numHeads. Default: ``0``.
9255
+ block_size (int, optional): The maximum number of tokens stored in each block of KV in page attention.
9256
+ Default: ``0``.
9257
+ inner_precise (int, optional): An int number from {0, 1} indicates computing mode.
9258
+ ``0`` for high precision mode for float16 dtype. ``1`` for high performance mode.
9259
+ Default: ``1``.
8419
9260
  kv_padding_size (Tensor, optional): The tensor with data type of int64. The range of values is
8420
9261
  :math:`0\le kv\_padding\_size \le S-max(actual\_seq\_length)`. The shape is :math:`()` or :math:`(1,)`.
8421
9262
  Default: ``None``.
@@ -8423,6 +9264,25 @@ def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=
8423
9264
  Returns:
8424
9265
  attention_out (Tensor), the shape is :math:`(B, 1, H)` / :math:`(B, N, 1, D)`.
8425
9266
 
9267
+ Raises:
9268
+ TypeError: dtype of `query` is not float16 or bfloat16.
9269
+ TypeError: `key` and `value` don't have the same dtype.
9270
+ TypeError: dtype of `attn_mask` is not bool, int8 or uint8.
9271
+ TypeError: dtype of `pse_shift` is not bfloat16 or float16.
9272
+ TypeError: `scale_value` is not a double number.
9273
+ TypeError: `input_layout` is not a string.
9274
+ TypeError: `num_key_value_heads` or `num_heads` is not an int.
9275
+ TypeError: `inner_precise` is not an int.
9276
+ TypeError: `quant_scale1` is not Tensor of type float32.
9277
+ TypeError: `quant_scale2` is not Tensor of type float32.
9278
+ TypeError: `quant_offset2` is not Tensor of type float32.
9279
+ ValueError: size of `actual_seq_lengths` is not 1 or B.
9280
+ ValueError: `input_layout` is a string but of `(BSH)` or `(BNSD)`.
9281
+ ValueError: `num_heads` is not divisible by Q_H.
9282
+ ValueError: `num_heads` is not divisible by `num_key_value_heads`.
9283
+ RuntimeError: `num_heads` is not greater than 0.
9284
+ RuntimeError: `attn_mask` shape is not valid.
9285
+
8426
9286
  Supported Platforms:
8427
9287
  ``Ascend``
8428
9288
 
@@ -8435,7 +9295,7 @@ def incre_flash_attention(query, key, value, attn_mask=None, actual_seq_lengths=
8435
9295
  >>> query = Tensor(np.random.randn(B, 1, N * D), mstype.float16)
8436
9296
  >>> key = [Tensor(np.random.randn(B, S, kvN * D), mstype.float16)]
8437
9297
  >>> value = [Tensor(np.random.randn(B, S, kvN * D), mstype.float16)]
8438
- >>> ifa_ms = ops.functional.incre_flash_attention
9298
+ >>> ifa_ms = ops.incre_flash_attention
8439
9299
  >>> attn_out = ifa_ms(query, key, value, num_heads=N, num_key_value_heads=kvN)
8440
9300
  >>> attn_out
8441
9301
  Tensor(shape=[1, 1, 512], dtype=Float16, value=
@@ -8458,7 +9318,7 @@ def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, sca
8458
9318
  Args:
8459
9319
  input (Tensor): The indices used to lookup in the `weight`. The data type must be mindspore.int32 or
8460
9320
  mindspore.int64, and the value should be in range `[0, weight.shape[0])`.
8461
- weight (Parameter): The matrix where to lookup from. The shape must be 2D.
9321
+ weight (Union[Parameter, Tensor]): The matrix where to lookup from. The shape must be 2D.
8462
9322
  padding_idx (int, optional): If the value is not None, the corresponding row of `weight` will not be updated
8463
9323
  in training. The value should be in range `[-weight.shape[0], weight.shape[0])` if it's not ``None``.
8464
9324
  Default ``None``.
@@ -8475,7 +9335,6 @@ def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, sca
8475
9335
  Raises:
8476
9336
  ValueError: If `padding_idx` is out of valid range.
8477
9337
  ValueError: If the shape of `weight` is invalid.
8478
- TypeError: `weight` is not a :class:`mindspore.Parameter`.
8479
9338
 
8480
9339
  Supported Platforms:
8481
9340
  ``Ascend``
@@ -8500,6 +9359,215 @@ def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, sca
8500
9359
  return embedding_op(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq)
8501
9360
 
8502
9361
 
9362
+ def speed_fusion_attention(query, key, value, head_num, input_layout, *, pse=None, padding_mask=None, atten_mask=None,
9363
+ scale=1.0, keep_prob=1.0, pre_tokens=2147483647, next_tokens=2147483647, inner_precise=0,
9364
+ prefix=None, actual_seq_qlen=None, actual_seq_kvlen=None, sparse_mode=0,
9365
+ gen_mask_parallel=True, sync=False, pse_type=1, q_start_idx=None, kv_start_idx=None):
9366
+ r"""
9367
+ The interface is used for self-attention fusion computing.
9368
+ If `pse_type` is ``1`` , calculation formula is:
9369
+
9370
+ .. math::
9371
+ attention\_out = Dropout(Softmax(Mask(scale * (pse + query * key^{T}), atten\_mask)), keep\_prob) * value
9372
+
9373
+ If `pse_type` is other valid value, calculation formula is:
9374
+
9375
+ .. math::
9376
+ attention\_out = Dropout(Softmax(Mask(scale * (query * key^{T}) + pse, atten\_mask)), keep\_prob) * value
9377
+
9378
+ - B: Batch size. Value range 1 to 2k.
9379
+ - S1: Sequence length of query. Value range 1 to 512k.
9380
+ - S2: Sequence length of key and value. Value range 1 to 512k.
9381
+ - N1: Num heads of query. Value range 1 to 256.
9382
+ - N2: Num heads of key and value, and N2 must be a factor of N1.
9383
+ - D: Head size. The value ranges is a multiple of 16, with the max value of 512.
9384
+ - H1: Hidden size of query, which equals to N1 * D.
9385
+ - H2: Hidden size of key and value, which equals to N2 * D.
9386
+
9387
+ .. warning::
9388
+ - This is an experimental API that is subject to change or deletion.
9389
+ - Only support on Atlas A2 training series.
9390
+
9391
+ Note:
9392
+ This interface is not supported in `graph mode (mode=mindspore.GRAPH_MODE)
9393
+ <https://www.mindspore.cn/tutorials/en/master/compile/static_graph.html>`_.
9394
+
9395
+ Args:
9396
+ query (Tensor): The query tensor. Input tensor of shape :math:`(B, S1, H1)`,
9397
+ :math:`(B, N1, S1, D)`, :math:`(S1, B, H1)`, :math:`(B, S1, N1, D)` or :math:`(T1, N1, D)`.
9398
+ key (Tensor): The key tensor. Input tensor of shape :math:`(B, S2, H2)`,
9399
+ :math:`(B, N2, S2, D)`, :math:`(S2, B, H2)`, :math:`(B, S2, N2, D)` or :math:`(T2, N2, D)`.
9400
+ value (Tensor): The value tensor. Input tensor of shape :math:`(B, S2, H2)`,
9401
+ :math:`(B, N2, S2, D)`, :math:`(S2, B, H2)`, :math:`(B, S2, N2, D)` or :math:`(T2, N2, D)`.
9402
+ The `key` and `value` should have the same shape.
9403
+ head_num (int): The head num of query, equal to N1.
9404
+ input_layout (str): Specifies the layout of input `query`, `key` and `value`. The value can be ``"BSH"`` ,
9405
+ ``"BNSD"`` , ``"SBH"`` , ``"BSND"`` or ``"TND"`` . ``"TND"`` is an experimental format.
9406
+ When `input_layout` is ``"TND"`` , the following restrictions must be met.
9407
+ There are two lists that represent the length of the input sequence: list_seq_q and list_seq_k. Each
9408
+ value in the list indicates the length of the sequence in the batch. For example, list_seq_q = [4, 2, 6],
9409
+ list_seq_k = [10, 3, 9]. The element of list indicate S. T1 is sum(list_seq_q) = 12, T2 is
9410
+ sum(list_seq_k) = 22.
9411
+ max_seqlen_q = max(list_seq_q), max_seqlen_k = max(list_seq_k).
9412
+ qk_pointer = sum(list_seq_q * list_seq_k), which is the sum of the element multiplication.
9413
+
9414
+ - The lengths of two lists are the same, and size of list is batch. batch is less than or equal to 1024.
9415
+ - When `input_layout` is ``"TND"`` , `actual_seq_qlen` and `actual_seq_kvlen` must be not ``None`` .
9416
+ Otherwise, they are ``None`` .
9417
+ - The `actual_seq_qlen` and `actual_seq_kvlen` are the cumulative sum of sequence of key/value, so they must
9418
+ be non-decreasing.
9419
+ - If `pse` is not ``None`` , list_seq_q and list_seq_k must be same. The maximum value of list_seq_q and
9420
+ list_seq_k is greater than 1024. `pse` should be :math:`(B, N1, 1024, S2)` and
9421
+ :math:`(1, N1, 1024, S2)`, and S2 is equal to max_seqlen_k.
9422
+ - `atten_mask` must be a lower trianglar matrix, so `sparse_mode` should be 2 or 3. The shape of
9423
+ `atten_mask` should be :math:`(2048, 2048)`.
9424
+ - Prefix is ``None`` .
9425
+ - `next_tokens` is 0, and `pre_tokens` is not less than max_seqlen_q.
9426
+ - When `sparse_mode` is 3, S1 of each batch should be less than or equal to S2.
9427
+ - 0 should not exist in list_seq_k.
9428
+
9429
+ Keyword Args:
9430
+ pse (Tensor, optional): The position embedding code, dtype is same as `query`. Default: ``None`` .
9431
+ If S is greater than 1024 and the mask of the lower triangle is used, enter only the inverse 1024 lines of
9432
+ the lower triangle for memory optimization. Input tensor of shape :math:`(B, N1, S1, S2)`,
9433
+ :math:`(1, N1, S1, S2)`, :math:`(B, N1, 1024, S2)`, :math:`(1, N1, 1024, S2)`.
9434
+
9435
+ - ALiBi scenario: `pse` must meet the ALiBi rule, and `sparse_mode` is 2 or 3 for the lower triangle.
9436
+ In this scenario, `pse` is :math:`(B, N1, 1024, S2)`, :math:`(1, N1, 1024, S2)`.
9437
+ - Non-ALiBi scenario: `pse` is :math:`(B, N1, S1, S2)`, :math:`(1, N1, S1, S2)`.
9438
+ - The shape of `pse` should be :math:`(B, N1, 1024, S2)` and :math:`(1, N1, 1024, S2)` when `input_layout`
9439
+ is ``"TND"`` .
9440
+ - If `pse_type` is 2 or 3, dtype of `pse` must be float32, and shape of `pse` should be :math:`(B, N1)` or
9441
+ :math:`(N1,)`.
9442
+
9443
+ padding_mask (Tensor, optional): Reserved parameter. Not implemented yet. Default: ``None`` .
9444
+ atten_mask (Tensor, optional): The attention mask tensor. For each element, 0/False indicates retention and
9445
+ 1/True indicates discard. Input tensor of shape :math:`(B, N1, S1, S2)`, :math:`(B, 1, S1, S2)`,
9446
+ :math:`(S1, S2)` or :math:`(2048, 2048)`. Default: ``None`` .
9447
+
9448
+ - In compression scenario, `sparse_mode` is 2, 3, or 4, `atten_mask` must be :math:`(2048, 2048)`.
9449
+ - When `sparse_mode` is 5, `atten_mask` must be :math:`(B, N1, S1, S2)`, :math:`(B, 1, S1, S2)`.
9450
+ - When `sparse_mode` is 0 and 1, `atten_mask` should be :math:`(B, N1, S1, S2)`, :math:`(B, 1, S1, S2)`,
9451
+ :math:`(S1, S2)`.
9452
+
9453
+ scale (float, optional): The scale factor of score. Generally, the value is 1.0 / (D ** 0.5). Default: ``1.0`` .
9454
+ keep_prob (float, optional): The keep probability of dropout. Value range is (0.0, 1.0]. Default: ``1.0`` .
9455
+ pre_tokens (int, optional): Parameter for sparse computation, represents how many tokens are counted forward.
9456
+ When `sparse_mode` is set to 1, 2, 3, or 5, this parameter does not take effect. Default: ``2147483647`` .
9457
+ next_tokens (int, optional): Parameter for sparse computation, represents how many tokens are counted backward.
9458
+ When `sparse_mode` is set to 1, 2, 3, or 5, this parameter does not take effect. Default: ``2147483647`` .
9459
+ The value of pre_tokens corresponds to S1, and the value of next_tokens corresponds to S2. They define the
9460
+ valid area on the `atten_mask` matrix. It must ensure that the band is not empty.
9461
+ The following values are not allowed:
9462
+
9463
+ - pre_tokens < 0 and next_tokens < 0.
9464
+ - (pre_tokens < 0 and next_tokens >= 0) and (next_tokens < abs(pre_tokens) or abs(pre_tokens) >= S2).
9465
+ - (pre_tokens >= 0 and next_tokens < 0) and (abs(next_tokens) > pre_tokens or abs(next_tokens) >= S1).
9466
+
9467
+ inner_precise (int, optional): The parameter is reserved and not implemented yet. Default: ``0`` .
9468
+ prefix (Union[tuple[int], list[int]], optional): N value of each Batch in the prefix sparse calculation
9469
+ scenario. Input tensor of shape :math:`(B,)`. B max value 32. Not none only when sparse_mode is 5.
9470
+ If S1 > S2, N ranges from 0 to S2. If S1 <= S2, N ranges from S2 - S1 to S2. Default: ``None`` .
9471
+ actual_seq_qlen (Union[tuple[int], list[int]], optional): Size of query corresponding to each batch, array
9472
+ with increasing values and the last value equal to T1. Default: ``None`` .
9473
+ actual_seq_kvlen (Union[tuple[int], list[int]], optional): Size of key and value corresponding to each batch,
9474
+ array with increasing values and the last value equal to T2. Default: ``None`` .
9475
+ sparse_mode (int, optional): Indicates sparse mode. Default ``0`` .
9476
+
9477
+ - 0: Indicates the defaultMask mode. If `atten_mask` is not passed, the mask operation is not performed,
9478
+ and preTokens and nextTokens(internally assigned as INT_MAX) are ignored. If passed in, the full
9479
+ `atten_mask` matrix (S1 * S2) needs to be passed in, indicating that the part between preTokens and
9480
+ nextTokens needs to be calculated.
9481
+ - 1: Represents allMask, that is, passing in the complete `atten_mask` matrix.
9482
+ - 2: Representing the leftUpCausal mode corresponds to the lower triangle scenario divided by the left
9483
+ vertex, and the optimized `atten_mask` matrix (2048*2048) is required.
9484
+ - 3: Representing the rightDownCausal model corresponds to the lower triangle scene divided by the lower
9485
+ right vertex, and the optimized `atten_mask` matrix (2048*2048) is required.
9486
+ - 4: Represents the band scenario, that is, the part between counting preTokens and nextTokens, and the
9487
+ optimized `atten_mask` matrix (2048*2048) is required.
9488
+ - 5: Represents the prefix scenario, that is, on the basis of rightDownCasual, a matrix with length S1 and
9489
+ width N is added to the left side. The value of N is obtained by the new input prefix, and the N value
9490
+ of each Batch axis is different. Currently not enabled.
9491
+ - 6: Represents the global scenario. Currently not enabled.
9492
+ - 7: Represents the dilated scenario. Currently not enabled.
9493
+ - 8: Represents the block_local scenario. Currently not enabled.
9494
+
9495
+ gen_mask_parallel (bool, optional): Debug parameter, a switch to control dropout_gen_mask execution method.
9496
+ If ``True`` , dropout_gen_mask is executed in parallel. If ``False`` , execution is serial.
9497
+ Not implemented yet. Default: ``True`` .
9498
+ sync (bool, optional): Debug parameter, a switch to control dropout_gen_mask execution method.
9499
+ If ``True`` , dropout_gen_mask is executed synchronously. If ``False`` , execution is asynchronous.
9500
+ Not implemented yet. Default: ``False`` .
9501
+ pse_type (int, optional): Indicates how to use `pse`. Default ``1`` .
9502
+
9503
+ - 0: `pse` is passed from outside, and the calculation process is to first mul `scale` and then add `pse`.
9504
+ - 1: `pse` is passed from outside, and the calculation process is to add `pse` first and then mul `scale`.
9505
+ - 2: `pse` is generated internally and generates standard alibi position information. The internally
9506
+ generated alibi matrix 0 line is aligned with the upper left corner of :math:`query * key^{T}`.
9507
+ - 3: `pse` is generated internally, and the generated alibi position information is based on the standard
9508
+ and then the square root of sqrt is done. The internally generated alibi matrix 0 line is aligned with
9509
+ the upper left corner of :math:`query * key^{T}`.
9510
+
9511
+ q_start_idx (Union[tuple[int], list[int]], optional): Int array with length 1. Default: ``None`` .
9512
+ When pse_type is configured as ``2`` or ``3`` , it indicates the number of cells that the internally
9513
+ generated alibi code is offset in the S1 direction. A positive number indicates that 0 moves diagonally
9514
+ upward.
9515
+ kv_start_idx (Union[tuple[int], list[int]], optional): Int array with length 1. Default: ``None`` .
9516
+ When pse_type is configured as ``2`` or ``3`` , it indicates the number of cells that the internally
9517
+ generated alibi code is offset in the S2 direction. A positive number indicates that 0 moves diagonally
9518
+ upward.
9519
+
9520
+ Returns:
9521
+ A tuple of tensors containing `attention_out`, `softmax_max`, `softmax_sum`, `softmax_out`, `seed`, `offset`
9522
+ and `numels` .
9523
+
9524
+ - `attention_out` is the output of attention, it's shape, and data type are the same as the query.
9525
+ - `softmax_max` is the max intermediate result calculated by Softmax, used for grad calculation.
9526
+ - `softmax_sum` is the sum intermediate result calculated by Softmax, used for grad calculation.
9527
+ - `softmax_out` is a reserved parameter.
9528
+ - `seed` is generated seed, used for Dropout.
9529
+ - `offset` is generated offset, used for Dropout.
9530
+ - `numels` is the length of generated dropout_mask.
9531
+
9532
+ Raises:
9533
+ TypeError: `query`, `key` and `value` don't have the same dtype.
9534
+ TypeError: Dtype of `atten_mask` is not bool or uint8.
9535
+ TypeError: `scale` or `keep_prob` is not a float number.
9536
+ TypeError: `input_layout` is not a string.
9537
+ TypeError: `head_num` is not an int.
9538
+ TypeError: `sparse_mode` is not an int.
9539
+ TypeError: `pse` is not Tensor type.
9540
+ TypeError: `padding_mask` is not Tensor type.
9541
+ TypeError: `atten_mask` is not Tensor type.
9542
+ TypeError: `pse_type` is not an int.
9543
+ ValueError: `input_layout` is a string but not valid.
9544
+ ValueError: The specified value of `sparse_mode` is invalid.
9545
+ ValueError: The specified value of `pse_type` is invalid.
9546
+
9547
+ Supported Platforms:
9548
+ ``Ascend``
9549
+
9550
+ Examples:
9551
+ >>> import mindspore
9552
+ >>> import mindspore.common.dtype as mstype
9553
+ >>> import numpy as np
9554
+ >>> from mindspore import ops, Tensor
9555
+ >>> query = Tensor(np.ones([2, 4, 64]), dtype=mstype.float16)
9556
+ >>> key = Tensor(np.ones([2, 4, 64]), dtype=mstype.float16)
9557
+ >>> value = Tensor(np.ones([2, 4, 64]), dtype=mstype.float16)
9558
+ >>> head_num = 4
9559
+ >>> input_layout = "BSH"
9560
+ >>> output = ops.speed_fusion_attention(query, key, value, head_num, input_layout)
9561
+ >>> print(output[0].shape)
9562
+ (2, 4, 64)
9563
+ """
9564
+ seed, offset = default_generator._step(generator_step_) # pylint: disable=protected-access
9565
+ return speed_fusion_attention_op(query, key, value, head_num, input_layout, seed, offset, pse, padding_mask,
9566
+ atten_mask, scale, keep_prob, pre_tokens, next_tokens, inner_precise, prefix,
9567
+ actual_seq_qlen, actual_seq_kvlen, sparse_mode, gen_mask_parallel, sync, pse_type,
9568
+ q_start_idx, kv_start_idx)
9569
+
9570
+
8503
9571
  __all__ = [
8504
9572
  'adaptive_avg_pool1d',
8505
9573
  'adaptive_avg_pool2d',
@@ -8530,11 +9598,15 @@ __all__ = [
8530
9598
  'fast_gelu',
8531
9599
  'fractional_max_pool2d',
8532
9600
  'fractional_max_pool3d',
9601
+ 'speed_fusion_attention',
8533
9602
  'pixel_shuffle',
8534
9603
  'pixel_unshuffle',
8535
9604
  'hardshrink',
8536
9605
  'is_floating_point',
8537
9606
  'incre_flash_attention',
9607
+ 'prompt_flash_attention',
9608
+ 'flash_attention_score',
9609
+ 'fused_infer_attention_score',
8538
9610
  'flip',
8539
9611
  'fliplr',
8540
9612
  'flipud',
@@ -8555,7 +9627,6 @@ __all__ = [
8555
9627
  'softplus',
8556
9628
  'selu',
8557
9629
  'silu',
8558
- 'soft_margin_loss',
8559
9630
  'softmax',
8560
9631
  'softmin',
8561
9632
  'pdist',
@@ -8577,6 +9648,7 @@ __all__ = [
8577
9648
  'conv2d',
8578
9649
  'conv_transpose2d',
8579
9650
  'sigmoid',
9651
+ 'soft_margin_loss',
8580
9652
  'logsigmoid',
8581
9653
  'relu',
8582
9654
  'relu6',
@@ -8594,6 +9666,8 @@ __all__ = [
8594
9666
  'gaussian_nll_loss',
8595
9667
  'lp_pool1d',
8596
9668
  'lp_pool2d',
9669
+ 'moe_token_permute',
9670
+ 'moe_token_unpermute',
8597
9671
  'max_unpool1d',
8598
9672
  'max_unpool2d',
8599
9673
  'max_unpool3d',
@@ -8605,5 +9679,6 @@ __all__ = [
8605
9679
  'add_layer_norm',
8606
9680
  'group_norm',
8607
9681
  'rms_norm',
9682
+ 'add_rms_norm',
8608
9683
  ]
8609
9684
  __all__.sort()