mindspore 2.1.0__cp37-none-any.whl → 2.2.11__cp37-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (577) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -1
  3. mindspore/_akg/akg/build_module.py +5 -6
  4. mindspore/_akg/akg/composite/build_module.py +139 -22
  5. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  6. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  7. mindspore/_akg/akg/tvm/api.py +4 -3
  8. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  9. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  10. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  11. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  12. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  13. mindspore/_akg/akg/tvm/build_module.py +16 -1
  14. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  15. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  16. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  17. mindspore/_akg/akg/tvm/module.py +1 -2
  18. mindspore/_akg/akg/tvm/stmt.py +2 -2
  19. mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
  20. mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
  21. mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
  22. mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
  23. mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
  24. mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
  25. mindspore/_akg/akg/utils/composite_op_helper.py +16 -12
  26. mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
  27. mindspore/_akg/akg/utils/kernel_exec.py +98 -274
  28. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  29. mindspore/_akg/akg/utils/tbe_codegen_utils.py +219 -0
  30. mindspore/_akg/akg/utils/util.py +56 -1
  31. mindspore/_c_dataengine.cpython-37m-aarch64-linux-gnu.so +0 -0
  32. mindspore/_c_expression.cpython-37m-aarch64-linux-gnu.so +0 -0
  33. mindspore/_c_mindrecord.cpython-37m-aarch64-linux-gnu.so +0 -0
  34. mindspore/_check_jit_forbidden_api.py +3 -1
  35. mindspore/_checkparam.py +23 -29
  36. mindspore/_extends/graph_kernel/__init__.py +0 -1
  37. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  38. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  39. mindspore/_extends/graph_kernel/splitter.py +4 -11
  40. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
  41. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
  42. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  43. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  44. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
  45. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
  46. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  47. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  48. mindspore/_extends/parse/__init__.py +13 -15
  49. mindspore/_extends/parse/namespace.py +7 -33
  50. mindspore/_extends/parse/parser.py +67 -72
  51. mindspore/_extends/parse/resources.py +1 -1
  52. mindspore/_extends/parse/standard_method.py +86 -106
  53. mindspore/_extends/parse/trope.py +1 -1
  54. mindspore/_extends/remote/kernel_build_server.py +25 -7
  55. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  56. mindspore/_install_custom.py +43 -0
  57. mindspore/_mindspore_offline_debug.cpython-37m-aarch64-linux-gnu.so +0 -0
  58. mindspore/amp.py +47 -11
  59. mindspore/bin/cache_admin +0 -0
  60. mindspore/bin/cache_server +0 -0
  61. mindspore/boost/boost.py +1 -8
  62. mindspore/boost/boost_cell_wrapper.py +3 -2
  63. mindspore/boost/grad_accumulation.py +1 -1
  64. mindspore/boost/group_loss_scale_manager.py +8 -7
  65. mindspore/common/__init__.py +5 -3
  66. mindspore/common/_jit_fallback_utils.py +6 -0
  67. mindspore/common/_register_for_adapter.py +2 -0
  68. mindspore/common/_register_for_tensor.py +2 -2
  69. mindspore/common/_stub_tensor.py +13 -0
  70. mindspore/common/_utils.py +29 -0
  71. mindspore/common/api.py +174 -259
  72. mindspore/common/auto_dynamic_shape.py +494 -0
  73. mindspore/common/dtype.py +18 -11
  74. mindspore/common/dump.py +6 -4
  75. mindspore/common/initializer.py +14 -14
  76. mindspore/common/jit_config.py +33 -15
  77. mindspore/common/lazy_inline.py +126 -7
  78. mindspore/common/mindir_util.py +101 -0
  79. mindspore/common/parameter.py +51 -41
  80. mindspore/common/seed.py +4 -4
  81. mindspore/common/sparse_tensor.py +13 -14
  82. mindspore/common/tensor.py +243 -165
  83. mindspore/communication/__init__.py +7 -4
  84. mindspore/communication/_comm_helper.py +83 -4
  85. mindspore/communication/management.py +152 -84
  86. mindspore/config/op_info.config +14 -3
  87. mindspore/config/super_bar_config.json +4 -2
  88. mindspore/context.py +152 -61
  89. mindspore/dataset/__init__.py +5 -5
  90. mindspore/dataset/audio/__init__.py +2 -2
  91. mindspore/dataset/audio/transforms.py +52 -52
  92. mindspore/dataset/callback/ds_callback.py +16 -2
  93. mindspore/dataset/core/config.py +68 -51
  94. mindspore/dataset/engine/cache_client.py +33 -7
  95. mindspore/dataset/engine/datasets.py +250 -112
  96. mindspore/dataset/engine/datasets_audio.py +43 -211
  97. mindspore/dataset/engine/datasets_standard_format.py +16 -35
  98. mindspore/dataset/engine/datasets_text.py +43 -67
  99. mindspore/dataset/engine/datasets_user_defined.py +86 -100
  100. mindspore/dataset/engine/datasets_vision.py +219 -1029
  101. mindspore/dataset/engine/iterators.py +11 -4
  102. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
  103. mindspore/dataset/engine/obs/util.py +3 -0
  104. mindspore/dataset/engine/samplers.py +1 -1
  105. mindspore/dataset/engine/validators.py +19 -5
  106. mindspore/dataset/text/__init__.py +3 -3
  107. mindspore/dataset/text/transforms.py +101 -127
  108. mindspore/dataset/text/utils.py +205 -138
  109. mindspore/dataset/transforms/__init__.py +1 -1
  110. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  111. mindspore/dataset/transforms/transforms.py +95 -40
  112. mindspore/dataset/utils/browse_dataset.py +8 -2
  113. mindspore/dataset/utils/line_reader.py +17 -19
  114. mindspore/dataset/vision/__init__.py +3 -3
  115. mindspore/dataset/vision/c_transforms.py +6 -3
  116. mindspore/dataset/vision/transforms.py +409 -287
  117. mindspore/dataset/vision/utils.py +13 -14
  118. mindspore/dataset/vision/validators.py +11 -1
  119. mindspore/experimental/map_parameter.py +14 -0
  120. mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
  121. mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
  122. mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
  123. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  124. mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
  125. mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
  126. mindspore/gen_ops.py +273 -0
  127. mindspore/include/OWNERS +0 -1
  128. mindspore/include/api/data_type.h +2 -1
  129. mindspore/include/api/graph.h +0 -15
  130. mindspore/include/api/kernel.h +2 -0
  131. mindspore/include/api/kernel_api.h +37 -12
  132. mindspore/include/api/model.h +17 -14
  133. mindspore/include/api/status.h +8 -3
  134. mindspore/include/api/types.h +37 -4
  135. mindspore/include/c_api/ms/abstract.h +67 -0
  136. mindspore/include/c_api/ms/attribute.h +197 -0
  137. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  138. mindspore/include/c_api/ms/base/macros.h +32 -0
  139. mindspore/include/c_api/ms/base/status.h +33 -0
  140. mindspore/include/c_api/ms/base/types.h +282 -0
  141. mindspore/include/c_api/ms/context.h +102 -0
  142. mindspore/include/c_api/ms/graph.h +160 -0
  143. mindspore/include/c_api/ms/node.h +606 -0
  144. mindspore/include/c_api/ms/tensor.h +161 -0
  145. mindspore/include/c_api/ms/value.h +84 -0
  146. mindspore/include/dataset/constants.h +6 -5
  147. mindspore/include/dataset/execute.h +23 -13
  148. mindspore/include/dataset/text.h +26 -26
  149. mindspore/include/dataset/transforms.h +13 -13
  150. mindspore/include/dataset/vision.h +60 -60
  151. mindspore/include/dataset/vision_ascend.h +5 -6
  152. mindspore/include/dataset/vision_lite.h +17 -17
  153. mindspore/include/mindapi/base/type_id.h +1 -0
  154. mindspore/include/mindapi/base/types.h +1 -0
  155. mindspore/lib/libdnnl.so.2 +0 -0
  156. mindspore/lib/libjemalloc.so.2 +0 -0
  157. mindspore/lib/libmindspore.so +0 -0
  158. mindspore/lib/libmindspore_backend.so +0 -0
  159. mindspore/lib/libmindspore_common.so +0 -0
  160. mindspore/lib/libmindspore_core.so +0 -0
  161. mindspore/lib/libmindspore_glog.so.0 +0 -0
  162. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  163. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  164. mindspore/lib/libmindspore_shared_lib.so +0 -0
  165. mindspore/lib/libnnacl.so +0 -0
  166. mindspore/lib/libopencv_core.so.4.5 +0 -0
  167. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  168. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  169. mindspore/lib/libps_cache.so +0 -0
  170. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
  171. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
  172. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
  173. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
  174. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  175. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  176. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  177. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  178. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  179. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  180. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  181. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  182. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  183. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  184. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  185. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +8998 -0
  186. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  187. mindspore/lib/plugin/ascend/libakg.so +0 -0
  188. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  189. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  190. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  191. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  192. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  193. mindspore/lib/plugin/cpu/libakg.so +0 -0
  194. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  195. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  196. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  197. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  198. mindspore/nn/__init__.py +0 -2
  199. mindspore/nn/cell.py +313 -74
  200. mindspore/nn/dynamic_lr.py +21 -21
  201. mindspore/nn/layer/activation.py +22 -30
  202. mindspore/nn/layer/basic.py +15 -13
  203. mindspore/nn/layer/channel_shuffle.py +1 -1
  204. mindspore/nn/layer/container.py +271 -9
  205. mindspore/nn/layer/conv.py +323 -204
  206. mindspore/nn/layer/dense.py +8 -5
  207. mindspore/nn/layer/embedding.py +33 -27
  208. mindspore/nn/layer/flash_attention.py +61 -95
  209. mindspore/nn/layer/image.py +8 -6
  210. mindspore/nn/layer/math.py +16 -25
  211. mindspore/nn/layer/normalization.py +107 -66
  212. mindspore/nn/layer/padding.py +1 -1
  213. mindspore/nn/layer/pooling.py +131 -109
  214. mindspore/nn/layer/rnn_cells.py +27 -22
  215. mindspore/nn/layer/rnns.py +13 -16
  216. mindspore/nn/layer/thor_layer.py +1 -1
  217. mindspore/nn/layer/transformer.py +221 -154
  218. mindspore/nn/learning_rate_schedule.py +9 -1
  219. mindspore/nn/loss/loss.py +235 -174
  220. mindspore/nn/optim/ada_grad.py +2 -1
  221. mindspore/nn/optim/adadelta.py +1 -0
  222. mindspore/nn/optim/adafactor.py +2 -1
  223. mindspore/nn/optim/adam.py +7 -4
  224. mindspore/nn/optim/adamax.py +3 -2
  225. mindspore/nn/optim/adasum.py +2 -2
  226. mindspore/nn/optim/asgd.py +2 -3
  227. mindspore/nn/optim/ftrl.py +6 -5
  228. mindspore/nn/optim/lamb.py +7 -4
  229. mindspore/nn/optim/lars.py +1 -1
  230. mindspore/nn/optim/lazyadam.py +5 -3
  231. mindspore/nn/optim/momentum.py +2 -1
  232. mindspore/nn/optim/optimizer.py +53 -4
  233. mindspore/nn/optim/proximal_ada_grad.py +3 -4
  234. mindspore/nn/optim/rmsprop.py +4 -3
  235. mindspore/nn/optim/rprop.py +23 -12
  236. mindspore/nn/optim/sgd.py +26 -11
  237. mindspore/nn/optim/thor.py +9 -7
  238. mindspore/nn/probability/bijector/bijector.py +5 -5
  239. mindspore/nn/probability/bijector/power_transform.py +27 -27
  240. mindspore/nn/probability/bijector/softplus.py +3 -3
  241. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
  242. mindspore/nn/probability/distribution/bernoulli.py +5 -5
  243. mindspore/nn/probability/distribution/beta.py +3 -3
  244. mindspore/nn/probability/distribution/categorical.py +7 -7
  245. mindspore/nn/probability/distribution/cauchy.py +0 -1
  246. mindspore/nn/probability/distribution/distribution.py +3 -3
  247. mindspore/nn/probability/distribution/gamma.py +3 -3
  248. mindspore/nn/probability/distribution/geometric.py +4 -4
  249. mindspore/nn/probability/distribution/gumbel.py +4 -4
  250. mindspore/nn/probability/distribution/log_normal.py +2 -2
  251. mindspore/nn/probability/distribution/logistic.py +2 -2
  252. mindspore/nn/probability/distribution/poisson.py +4 -4
  253. mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
  254. mindspore/nn/probability/distribution/uniform.py +6 -6
  255. mindspore/nn/wrap/__init__.py +4 -2
  256. mindspore/nn/wrap/cell_wrapper.py +87 -34
  257. mindspore/nn/wrap/grad_reducer.py +8 -5
  258. mindspore/nn/wrap/loss_scale.py +105 -42
  259. mindspore/numpy/array_creations.py +1 -2
  260. mindspore/numpy/array_ops.py +3 -2
  261. mindspore/numpy/utils_const.py +5 -5
  262. mindspore/offline_debug/convert_async.py +2 -2
  263. mindspore/ops/_grad_experimental/__init__.py +0 -5
  264. mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
  265. mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
  266. mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
  267. mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
  268. mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
  269. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
  270. mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
  271. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  272. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  273. mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
  274. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  275. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  276. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  277. mindspore/ops/_op_impl/{_custom_op/flash_attention/constants.py → aicpu/eps.py} +18 -27
  278. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  279. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
  280. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  281. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  282. mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
  283. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  284. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  285. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  286. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  287. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  288. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  289. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  290. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
  291. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
  292. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  293. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  294. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  295. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  296. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  297. mindspore/ops/_op_impl/tbe/__init__.py +4 -4
  298. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  299. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  300. mindspore/ops/_primitive_cache.py +1 -1
  301. mindspore/ops/_tracefunc.py +45 -13
  302. mindspore/ops/_utils/utils.py +6 -1
  303. mindspore/ops/_vmap/vmap_array_ops.py +3 -3
  304. mindspore/ops/_vmap/vmap_base.py +3 -3
  305. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  306. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  307. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  308. mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
  309. mindspore/ops/arg_dtype_cast.py +54 -0
  310. mindspore/ops/composite/base.py +37 -10
  311. mindspore/ops/composite/math_ops.py +5 -4
  312. mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
  313. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
  314. mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
  315. mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
  316. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  317. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  318. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  319. mindspore/ops/deprecated.py +304 -0
  320. mindspore/ops/function/__init__.py +4 -1
  321. mindspore/ops/function/array_func.py +174 -193
  322. mindspore/ops/function/clip_func.py +81 -13
  323. mindspore/ops/function/debug_func.py +1 -1
  324. mindspore/ops/function/grad/grad_func.py +18 -9
  325. mindspore/ops/function/image_func.py +10 -4
  326. mindspore/ops/function/linalg_func.py +5 -5
  327. mindspore/ops/function/math_func.py +575 -386
  328. mindspore/ops/function/nn_func.py +568 -260
  329. mindspore/ops/function/random_func.py +88 -57
  330. mindspore/ops/function/sparse_func.py +1 -1
  331. mindspore/ops/function/sparse_unary_func.py +14 -12
  332. mindspore/ops/function/vmap_func.py +6 -5
  333. mindspore/ops/functional.py +15 -10
  334. mindspore/ops/op_info_register.py +244 -25
  335. mindspore/ops/operations/__init__.py +31 -19
  336. mindspore/ops/operations/_grad_ops.py +71 -7
  337. mindspore/ops/operations/_inner_ops.py +350 -17
  338. mindspore/ops/operations/_quant_ops.py +4 -8
  339. mindspore/ops/operations/_sequence_ops.py +42 -0
  340. mindspore/ops/operations/array_ops.py +68 -282
  341. mindspore/ops/operations/comm_ops.py +107 -59
  342. mindspore/ops/operations/custom_ops.py +94 -70
  343. mindspore/ops/operations/debug_ops.py +8 -4
  344. mindspore/ops/operations/image_ops.py +18 -12
  345. mindspore/ops/operations/inner_ops.py +26 -3
  346. mindspore/ops/operations/math_ops.py +192 -144
  347. mindspore/ops/operations/nn_ops.py +857 -489
  348. mindspore/ops/operations/other_ops.py +0 -22
  349. mindspore/ops/operations/random_ops.py +53 -111
  350. mindspore/ops/operations/sparse_ops.py +3 -1
  351. mindspore/ops/primitive.py +24 -18
  352. mindspore/parallel/_auto_parallel_context.py +68 -8
  353. mindspore/parallel/_cost_model_context.py +2 -2
  354. mindspore/parallel/_offload_context.py +17 -3
  355. mindspore/parallel/_parallel_serialization.py +12 -5
  356. mindspore/parallel/_ps_context.py +12 -0
  357. mindspore/parallel/_tensor.py +18 -13
  358. mindspore/parallel/_transformer/layers.py +5 -3
  359. mindspore/parallel/_transformer/loss.py +1 -0
  360. mindspore/parallel/_transformer/moe.py +2 -2
  361. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  362. mindspore/parallel/_transformer/transformer.py +23 -3
  363. mindspore/parallel/_utils.py +11 -7
  364. mindspore/parallel/algo_parameter_config.py +85 -5
  365. mindspore/parallel/checkpoint_transform.py +19 -12
  366. mindspore/parallel/shard.py +21 -14
  367. mindspore/profiler/common/struct_type.py +3 -3
  368. mindspore/profiler/common/util.py +4 -2
  369. mindspore/profiler/envprofiling.py +1 -1
  370. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  371. mindspore/profiler/parser/ascend_flops_generator.py +2 -2
  372. mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
  373. mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
  374. mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
  375. mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
  376. mindspore/profiler/parser/ascend_op_generator.py +6 -6
  377. mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
  378. mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
  379. mindspore/profiler/parser/base_timeline_generator.py +10 -8
  380. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
  381. mindspore/profiler/parser/flops_parser.py +15 -11
  382. mindspore/profiler/parser/framework_parser.py +38 -22
  383. mindspore/profiler/parser/hccl_parser.py +16 -12
  384. mindspore/profiler/parser/integrator.py +22 -11
  385. mindspore/profiler/parser/memory_usage_parser.py +2 -2
  386. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  387. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  388. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  389. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  390. mindspore/profiler/parser/optime_parser.py +1 -1
  391. mindspore/profiler/parser/profiler_info.py +21 -2
  392. mindspore/profiler/parser/step_trace_parser.py +11 -14
  393. mindspore/profiler/profiling.py +179 -89
  394. mindspore/rewrite/api/node.py +102 -19
  395. mindspore/rewrite/api/node_type.py +5 -1
  396. mindspore/rewrite/api/pattern_engine.py +1 -1
  397. mindspore/rewrite/api/scoped_value.py +9 -17
  398. mindspore/rewrite/api/symbol_tree.py +131 -47
  399. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  400. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  401. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  402. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
  403. mindspore/rewrite/common/rewrite_elog.py +5 -1
  404. mindspore/rewrite/namer.py +33 -24
  405. mindspore/rewrite/namespace.py +14 -5
  406. mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
  407. mindspore/rewrite/node/call_function.py +79 -0
  408. mindspore/rewrite/node/cell_container.py +135 -0
  409. mindspore/rewrite/node/control_flow.py +88 -0
  410. mindspore/rewrite/{node.py → node/node.py} +273 -234
  411. mindspore/rewrite/node/node_manager.py +254 -0
  412. mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
  413. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  414. mindspore/rewrite/parsers/assign_parser.py +216 -221
  415. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  416. mindspore/rewrite/parsers/class_def_parser.py +174 -113
  417. mindspore/rewrite/parsers/constant_parser.py +9 -6
  418. mindspore/rewrite/parsers/container_parser.py +9 -7
  419. mindspore/rewrite/parsers/for_parser.py +42 -21
  420. mindspore/rewrite/parsers/function_def_parser.py +24 -16
  421. mindspore/rewrite/parsers/if_parser.py +28 -24
  422. mindspore/rewrite/parsers/module_parser.py +196 -25
  423. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  424. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  425. mindspore/rewrite/parsers/return_parser.py +6 -6
  426. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  427. mindspore/rewrite/sparsify/utils.py +1 -1
  428. mindspore/rewrite/symbol_tree.py +523 -578
  429. mindspore/rewrite/symbol_tree_builder.py +9 -193
  430. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  431. mindspore/run_check/_check_version.py +6 -4
  432. mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
  433. mindspore/safeguard/rewrite_obfuscation.py +541 -0
  434. mindspore/scipy/linalg.py +1 -1
  435. mindspore/scipy/ops.py +55 -5
  436. mindspore/scipy/optimize/__init__.py +3 -2
  437. mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
  438. mindspore/scipy/optimize/minimize.py +7 -3
  439. mindspore/train/_utils.py +7 -3
  440. mindspore/train/amp.py +323 -123
  441. mindspore/train/anf_ir_pb2.py +14 -2
  442. mindspore/train/callback/_backup_and_restore.py +2 -12
  443. mindspore/train/callback/_callback.py +29 -4
  444. mindspore/train/callback/_checkpoint.py +23 -8
  445. mindspore/train/callback/_early_stop.py +2 -2
  446. mindspore/train/callback/_landscape.py +4 -4
  447. mindspore/train/callback/_loss_monitor.py +2 -2
  448. mindspore/train/callback/_on_request_exit.py +2 -2
  449. mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
  450. mindspore/train/callback/_summary_collector.py +15 -8
  451. mindspore/train/callback/_time_monitor.py +58 -5
  452. mindspore/train/data_sink.py +5 -11
  453. mindspore/train/dataset_helper.py +84 -57
  454. mindspore/train/loss_scale_manager.py +2 -2
  455. mindspore/train/metrics/__init__.py +3 -3
  456. mindspore/train/metrics/cosine_similarity.py +1 -1
  457. mindspore/train/metrics/hausdorff_distance.py +3 -2
  458. mindspore/train/metrics/mean_surface_distance.py +3 -2
  459. mindspore/train/metrics/metric.py +39 -19
  460. mindspore/train/metrics/roc.py +2 -2
  461. mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
  462. mindspore/train/mind_ir_pb2.py +85 -36
  463. mindspore/train/model.py +187 -47
  464. mindspore/train/serialization.py +487 -161
  465. mindspore/train/summary/_summary_adapter.py +1 -1
  466. mindspore/train/summary/_writer_pool.py +3 -2
  467. mindspore/train/summary/summary_record.py +37 -17
  468. mindspore/train/train_thor/convert_utils.py +3 -3
  469. mindspore/train/train_thor/dataset_helper.py +1 -1
  470. mindspore/version.py +1 -1
  471. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +8 -8
  472. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +476 -527
  473. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -1
  474. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  475. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  476. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  477. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  478. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  479. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  480. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  481. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  482. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  483. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  484. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  485. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  486. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  487. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  488. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  489. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  490. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  491. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  492. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  493. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  494. mindspore/_extends/graph_kernel/expander.py +0 -80
  495. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
  496. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  497. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  498. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  499. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  500. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  501. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  502. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  503. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  504. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  505. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  506. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  507. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  508. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  509. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  510. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  511. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  512. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  513. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  514. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  515. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  516. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  517. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  518. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  519. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  520. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  521. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  522. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  523. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  524. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  525. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  526. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  527. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  528. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  529. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  530. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  531. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  532. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  533. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  534. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  535. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  536. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  537. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  538. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  539. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  540. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  541. mindspore/dataset/datapreprocess/__init__.py +0 -20
  542. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  543. mindspore/include/api/net.h +0 -142
  544. mindspore/nn/lr_scheduler.py +0 -262
  545. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
  546. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
  547. mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
  548. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  549. mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
  550. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -350
  551. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -409
  552. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -578
  553. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -199
  554. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -446
  555. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  556. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
  557. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
  558. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
  559. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
  560. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
  561. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
  562. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  563. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  564. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
  565. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
  566. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  567. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  568. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
  569. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
  570. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
  571. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
  572. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
  573. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  574. mindspore/rewrite/node_visitor.py +0 -44
  575. /mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
  576. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
  577. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
@@ -27,7 +27,7 @@ from mindspore.ops.operations import nn_ops as NN_OPS
27
27
  from mindspore.ops.operations import _sequence_ops as seq
28
28
  import mindspore.common.dtype as mstype
29
29
  from mindspore.ops.function.math_func import logsumexp
30
- from mindspore.ops.function.random_func import _get_seed
30
+ from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
31
31
  from mindspore.common.tensor import Tensor
32
32
  from mindspore._c_expression import Tensor as Tensor_
33
33
  from mindspore.ops._primitive_cache import _get_cache_prim
@@ -40,6 +40,7 @@ from mindspore.ops.operations.nn_ops import ChannelShuffle
40
40
  from mindspore.ops.operations.nn_ops import TripletMarginLoss
41
41
  from mindspore.ops.operations._inner_ops import SiLU
42
42
  from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
43
+ from mindspore.common.api import _function_forbid_reuse
43
44
 
44
45
  slice_ = P.Slice()
45
46
  fast_gelu_ = P.FastGeLU()
@@ -232,7 +233,7 @@ def adaptive_avg_pool3d(input, output_size):
232
233
  def _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad):
233
234
  """Checks the type of avgpool1d input"""
234
235
  validator.check_value_type('kernel_size', kernel_size, [int], 'avg_pool1d')
235
- validator.check_value_type('stride', stride, [int], 'avg_pool1d')
236
+ validator.check_value_type('stride', stride, (int, tuple), 'avg_pool1d')
236
237
  validator.check_value_type('ceil_mode', ceil_mode, bool, 'avg_pool1d')
237
238
  validator.check_value_type('count_include_pad', count_include_pad, bool, 'avg_pool1d')
238
239
  validator.check_int(kernel_size, 1, validator.GE, "kernel_size", 'avg_pool1d')
@@ -263,12 +264,10 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
263
264
  Args:
264
265
  input_x (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})`.
265
266
  kernel_size (int): The size of kernel window used to take the average value. Default: ``1`` .
266
- stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents the height and
267
- width of movement are both strides, or a tuple of two int numbers that represent height and width of
268
- movement respectively. Default: ``1`` .
269
- padding (Union(int, tuple[int])): The pad value to be filled. If `padding` is an integer, the paddings of left
270
- and right are the same, equal to pad. If `padding` is a tuple of `2` integers, the padding of left and right
271
- equal to `padding[0]` and `padding[1]` correspondingly. Default: ``0`` .
267
+ stride (Union(int, tuple[int])): The distance of kernel moving. `stride` can either be an int
268
+ number or a tuple of one int number. Default: ``1`` .
269
+ padding (Union(int, tuple[int])): The pad value to be filled. `padding` can either be an integer
270
+ or a tuple of one integer. Default: ``0`` .
272
271
  ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
273
272
  count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
274
273
 
@@ -300,20 +299,25 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
300
299
  raise TypeError("For avg_pool1d, the input input_x must be tensor")
301
300
 
302
301
  if len(input_x.shape) != 3:
303
- raise ValueError("For avg_pool1d, input must have 3 dim, but got {}.".format(len(input_x.shape)))
302
+ raise ValueError(f"For avg_pool1d, input must have 3 dim, but got {len(input_x.shape)}.")
304
303
 
305
304
  _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad)
306
305
  if isinstance(padding, int):
307
306
  check_non_negative_int(padding, 'padding', 'avg_pool1d')
308
307
  padding = (0, 0, 0, 0, padding, padding)
309
308
  elif isinstance(padding, tuple):
310
- if len(padding) != 2:
311
- raise ValueError("For avg_pool1d, padding should be int or tuple of length 2.")
309
+ if len(padding) != 1:
310
+ raise ValueError("For avg_pool1d, padding should be int or tuple of length 1.")
312
311
  for item in padding:
313
312
  check_non_negative_int(item, 'padding', 'avg_pool1d')
314
- padding = (0, 0, 0, 0, padding[0], padding[1])
313
+ padding = (0, 0, 0, 0, padding[0], padding[0])
315
314
  else:
316
- raise TypeError("For avg_pool1d, padding should be int or tuple of length 2.")
315
+ raise TypeError("For avg_pool1d, padding should be int or tuple of length 1.")
316
+
317
+ if isinstance(stride, tuple):
318
+ if len(stride) != 1:
319
+ raise ValueError("For avg_pool1d, stride should be int or tuple of length 1.")
320
+ stride = stride[0]
317
321
 
318
322
  expand_op = _get_cache_prim(P.ExpandDims)()
319
323
  squeeze_op = _get_cache_prim(P.Squeeze)((2, 3))
@@ -419,7 +423,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
419
423
  ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
420
424
  count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
421
425
  divisor_override (int): If specified, it will be used as divisor in the averaging calculation, otherwise
422
- `kernel_size` will be used. Default: ``0`` .
426
+ `kernel_size` will be used. Default: ``0``, which means not specified.
423
427
 
424
428
  Returns:
425
429
  Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
@@ -456,7 +460,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
456
460
  raise TypeError("For avg_pool2d, the input input_x must be tensor")
457
461
 
458
462
  if len(input_x.shape) != 4:
459
- raise ValueError("For avg_pool2d, input must have 4 dim, but got {}.".format(len(input_x.shape)))
463
+ raise ValueError(f"For avg_pool2d, input must have 4 dim, but got {len(input_x.shape)}.")
460
464
 
461
465
  kernel_size = _check_avgpool_2d_kernel_size(kernel_size)
462
466
  stride = _check_avgpool_2d_stride(stride)
@@ -527,7 +531,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
527
531
  count_include_pad (bool, optional): If ``True`` , averaging calculation
528
532
  will include the zero-padding. Default: ``True`` .
529
533
  divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
530
- otherwise `kernel_size` will be used. Default: ``0`` .
534
+ otherwise `kernel_size` will be used. Default: ``0`` , which means not specified.
531
535
 
532
536
  Returns:
533
537
  Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the same data type with `input_x`.
@@ -560,7 +564,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
560
564
  raise TypeError("For avg_pool3d, the input input_x must be tensor")
561
565
 
562
566
  if len(input_x.shape) != 5:
563
- raise ValueError("For avg_pool3d, input must have 5 dim, but got {}.".format(len(input_x.shape)))
567
+ raise ValueError(f"For avg_pool3d, input must have 5 dim, but got {len(input_x.shape)}.")
564
568
 
565
569
  _check_avg_pool3d_padding(padding)
566
570
 
@@ -637,21 +641,21 @@ def adaptive_max_pool1d(input, output_size):
637
641
  x_dtype = _get_cache_prim(P.DType)()(input)
638
642
 
639
643
  if len(x_in_shape) != 3:
640
- raise ValueError("For adaptive_max_pool1d input must have 3 dim, but got {}.".format(len(x_in_shape)))
644
+ raise ValueError(f"For adaptive_max_pool1d input must have 3 dim, but got {len(x_in_shape)}.")
641
645
  if x_in_shape[2] < output_size:
642
- raise ValueError("For adaptive_max_pool1d input's last dimension must be greater or equal to "
643
- "output size {}, but got {}.".format(output_size, x_in_shape[2]))
646
+ raise ValueError(f"For adaptive_max_pool1d input's last dimension must be greater or equal to "
647
+ f"output size {output_size}, but got {x_in_shape[2]}.")
644
648
  if x_in_shape[2] % output_size != 0:
645
- raise ValueError("For adaptive_max_pool1d input's last dimension must be divisible by "
646
- "output size {}, but got {}.".format(output_size, x_in_shape[2]))
649
+ raise ValueError(f"For adaptive_max_pool1d input's last dimension must be divisible by "
650
+ f"output size {output_size}, but got {x_in_shape[2]}.")
647
651
  if is_ascend_backend():
648
652
  if x_dtype not in [mstype.float16]:
649
- raise TypeError("For adaptive_max_pool1d in Ascend platform, the input dtype must be float16, "
650
- "but got {}.".format(x_dtype))
653
+ raise TypeError(f"For adaptive_max_pool1d in Ascend platform, the input dtype must be float16, "
654
+ f"but got {x_dtype}.")
651
655
  else:
652
656
  if x_dtype not in [mstype.float16, mstype.float32]:
653
- raise TypeError("For adaptive_max_pool1d, the input dtype must be float16 or float32, "
654
- "but got {}.".format(x_dtype))
657
+ raise TypeError(f"For adaptive_max_pool1d, the input dtype must be float16 or float32, "
658
+ f"but got {x_dtype}.")
655
659
 
656
660
  expand_ = _get_cache_prim(P.ExpandDims)()
657
661
  squeeze_ = _get_cache_prim(P.Squeeze)(2)
@@ -1147,7 +1151,7 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
1147
1151
  return out
1148
1152
 
1149
1153
 
1150
- def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reduction='mean'):
1154
+ def binary_cross_entropy_with_logits(logits, label, weight=None, pos_weight=None, reduction='mean'):
1151
1155
  r"""
1152
1156
  Adds sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy
1153
1157
  between the logits and the label.
@@ -1177,7 +1181,7 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1177
1181
 
1178
1182
  This operator will multiply the output by the corresponding weight.
1179
1183
  The tensor :math:`weight` assigns different weights to each piece of data in the batch,
1180
- and the tensor :math:`pos_weight` adds corresponding weights to the positive examples of each category.
1184
+ and the tensor :math:`pos\_weight` adds corresponding weights to the positive examples of each category.
1181
1185
 
1182
1186
  In addition, it can trade off recall and precision by adding weights to positive examples.
1183
1187
  In the case of multi-label classification the loss can be described as:
@@ -1196,17 +1200,21 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1196
1200
  logits (Tensor): Input logits. Data type must be float16 or float32.
1197
1201
  label (Tensor): Ground truth label, has the same shape as `logits`.
1198
1202
  Data type must be float16 or float32.
1199
- weight (Tensor): A rescaling weight applied to the loss of each batch element. It can be
1203
+ weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. It can be
1200
1204
  broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
1201
- pos_weight (Tensor): A weight of positive examples. Must be a vector with length equal to the
1205
+ Default: ``None``, `weight` is a Tensor whose value is ``1``.
1206
+ pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
1202
1207
  number of classes. It can be broadcast to a tensor with shape of `logits`.
1203
- Data type must be float16 or float32.
1204
- reduction (str): Type of reduction to be applied to loss. The optional values
1205
- are ``'mean'`` , ``'sum'`` , and ``'none'`` ,
1206
- not case sensitive. If ``'none'`` , do not perform reduction. Default: ``'mean'`` .
1208
+ Data type must be float16 or float32. Default: ``None``, `pos_weight` is a Tensor whose value is ``1``.
1209
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
1210
+ ``'sum'`` . Default: ``'mean'`` .
1211
+
1212
+ - ``'none'``: no reduction will be applied.
1213
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
1214
+ - ``'sum'``: the output elements will be summed.
1207
1215
 
1208
1216
  Returns:
1209
- Tensor or Scalar, if `reduction` is 'none', it's a tensor with the same shape and type as input `logits`.
1217
+ Tensor or Scalar, if `reduction` is ``'none'``, it's a tensor with the same shape and type as input `logits`.
1210
1218
  Otherwise, the output is a scalar.
1211
1219
 
1212
1220
  Raises:
@@ -1214,7 +1222,7 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1214
1222
  TypeError: If data type of input `logits`, `label`, `weight`, `pos_weight` is neither float16 nor float32.
1215
1223
  TypeError: If data type of input `reduction` is not string.
1216
1224
  ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`.
1217
- ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
1225
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
1218
1226
 
1219
1227
  Supported Platforms:
1220
1228
  ``Ascend`` ``GPU`` ``CPU``
@@ -1232,10 +1240,15 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1232
1240
  0.3463612
1233
1241
  """
1234
1242
 
1243
+ if weight is None:
1244
+ weight = ops.ones_like(logits)
1245
+ if pos_weight is None:
1246
+ pos_weight = ops.ones_like(logits)
1235
1247
  bce_with_logits_loss_op = _get_cache_prim(NN_OPS.BCEWithLogitsLoss)(reduction)
1236
1248
  return bce_with_logits_loss_op(logits, label, weight, pos_weight)
1237
1249
 
1238
1250
 
1251
+ @_function_forbid_reuse
1239
1252
  def dropout(input, p=0.5, training=True, seed=None):
1240
1253
  r"""
1241
1254
  During training, randomly zeroes some of the elements of the input tensor
@@ -1275,7 +1288,9 @@ def dropout(input, p=0.5, training=True, seed=None):
1275
1288
  return input
1276
1289
  keep_prob = 1 - p
1277
1290
  seed0, seed1 = _get_seed(seed, "dropout")
1278
- out, _ = P.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)(input)
1291
+ dropout_op = P.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)
1292
+ dropout_op = _set_prim_op_user_data(dropout_op, "random_cache", False)
1293
+ out, _ = dropout_op(input)
1279
1294
  return out
1280
1295
 
1281
1296
 
@@ -1820,7 +1835,7 @@ def kl_div(logits, labels, reduction='mean'):
1820
1835
  Its value must be one of ``'none'`` , ``'mean'`` , ``'batchmean'`` or ``'sum'`` . Default: ``'mean'`` .
1821
1836
 
1822
1837
  Returns:
1823
- Tensor or Scalar, if `reduction` is 'none', then output is a tensor and has the same shape as `logits`.
1838
+ Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`.
1824
1839
  Otherwise, it is a scalar.
1825
1840
 
1826
1841
  Raises:
@@ -2220,7 +2235,9 @@ def interpolate(input,
2220
2235
  One and only one of size and scale_factor can be set to None. Default: ``None`` .
2221
2236
  mode (str): The sampling algorithm.
2222
2237
  One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
2223
- 'area', 'nearest-exact'(3D and 4D). Default: ``"nearest"`` .
2238
+ 'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
2239
+ knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
2240
+
2224
2241
  align_corners (bool): If True, rescale input by :math:`(new\_height - 1) / (height - 1)`, which exactly
2225
2242
  aligns the corners of data and resized data. If False, rescale by :math:`new\_height / height`.
2226
2243
  Default: ``None`` .
@@ -2568,10 +2585,12 @@ def soft_margin_loss(input, target, reduction='mean'):
2568
2585
  Args:
2569
2586
  input (Tensor): Predict data. Data type must be float16 or float32.
2570
2587
  target (Tensor): Ground truth data, with the same type and shape as `logits`.
2571
- reduction (str, optional): Implements the reduction method to the output with ``'none'`` , ``'mean'`` ,
2572
- or ``'sum'`` ,
2573
- respectively indicate that no calculation is specified, that the mean is used, and that is calculated
2574
- using summation. Default: ``'mean'`` .
2588
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2589
+ ``'sum'`` . Default: ``'mean'`` .
2590
+
2591
+ - ``'none'``: no reduction will be applied.
2592
+ - ``'mean'``: compute and return the mean of elements in the output.
2593
+ - ``'sum'``: the output elements will be summed.
2575
2594
 
2576
2595
  Outputs:
2577
2596
  Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `logits`.
@@ -2751,6 +2770,55 @@ def soft_shrink(input, lambd=0.5):
2751
2770
  return soft_shrink_op(input)
2752
2771
 
2753
2772
 
2773
+ def softplus(input, beta=1, threshold=20): # pylint:disable=redefined-outer-name
2774
+ r"""
2775
+ Applies softplus function to `input` element-wise.
2776
+
2777
+ The softplus function is shown as follows, x is the element of `input` :
2778
+
2779
+ .. math::
2780
+
2781
+ \text{output} = \frac{1}{beta}\log(1 + \exp(\text{beta * x}))
2782
+
2783
+ When :math:`input * beta > threshold`, the implementation converts to the linear function
2784
+ to ensure numerical stability.
2785
+
2786
+ Args:
2787
+ input (Tensor) - Tensor of any dimension.
2788
+ Supported dtypes:
2789
+
2790
+ - GPU/CPU: float16, float32, float64.
2791
+ - Ascend: float16, float32.
2792
+
2793
+ beta (int, optional) - The :math:`\beta` value in softplus function. Default: ``1`` .
2794
+ threshold (int, optional) - When :math:`input * beta > threshold`, converting softplus to a linear function.
2795
+ Default: ``20`` .
2796
+
2797
+ Returns:
2798
+ Tensor, with the same type and shape as the `input` .
2799
+
2800
+ Raises:
2801
+ TypeError: If `input` is not a Tensor.
2802
+ TypeError: If the dtype of `input` is not float16, float32 or float64.
2803
+
2804
+ Supported Platforms:
2805
+ ``Ascend`` ``GPU`` ``CPU``
2806
+
2807
+ Examples:
2808
+ >>> import mindspore
2809
+ >>> import numpy as np
2810
+ >>> from mindspore import Tensor, ops
2811
+ >>> input = Tensor(np.array([0.1, 0.2, 30, 25]), mindspore.float32)
2812
+ >>> output = ops.softplus(input)
2813
+ >>> print(output)
2814
+ [0.7443967 0.79813886 30. 25.]
2815
+ """
2816
+ softplus_op = _get_cache_prim(P.Softplus)()
2817
+ scaling_input = beta * input
2818
+ op_output = (1 / beta) * softplus_op(scaling_input)
2819
+ return ops.select(input * beta > threshold, input, op_output)
2820
+
2821
+
2754
2822
  def silu(x):
2755
2823
  r"""
2756
2824
  Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
@@ -2860,7 +2928,7 @@ def sigmoid(input):
2860
2928
  >>> print(output)
2861
2929
  [0.7310586 0.880797 0.95257413 0.98201376 0.9933072 ]
2862
2930
  """
2863
- return sigmoid_(input)
2931
+ return _get_cache_prim(NN_OPS.Sigmoid)()(input)
2864
2932
 
2865
2933
 
2866
2934
  def logsigmoid(x):
@@ -2946,11 +3014,19 @@ def dense(input, weight, bias=None):
2946
3014
  _check_is_tensor("bias", bias, "dense")
2947
3015
  weight = ops.t(weight)
2948
3016
  input = ops.matmul(input, weight)
3017
+ input_shape = input.shape
2949
3018
  if bias is not None:
2950
3019
  input = input + bias
3020
+ _check_dense_add_bias_shape(input_shape, input.shape, bias.shape)
2951
3021
  return input
2952
3022
 
2953
3023
 
3024
+ def _check_dense_add_bias_shape(input_shape, output_shape, bias_shape):
3025
+ """Check that the output has the correct shape after adding bias."""
3026
+ if input_shape != output_shape:
3027
+ raise ValueError(f"For dense, the bias shape {bias_shape} does not match the input shape {input_shape}.")
3028
+
3029
+
2954
3030
  @_primexpr
2955
3031
  def check_dense_inputs_same_shape(input1_shape, input2_shape, prim_name=None):
2956
3032
  """check bidense input Tensors' shape"""
@@ -2965,7 +3041,10 @@ def bidense(input1, input2, weight, bias=None):
2965
3041
  Applies bilinear dense connected layer for `input1` and `input2`. The bilinear dense function is defined as:
2966
3042
 
2967
3043
  .. math::
2968
- output = input1^{T} weight input2 + bias
3044
+ output = x_{1}^{T}Ax_{2} + b
3045
+
3046
+ :math:`x_{1}` represents `input1` , :math:`x_{2}` represents `input2` , :math:`A` represents `weight` ,
3047
+ :math:`b` represents `bias` .
2969
3048
 
2970
3049
  .. warning::
2971
3050
  This is an experimental API that is subject to change or deletion.
@@ -3391,7 +3470,9 @@ def relu6(x):
3391
3470
  It returns :math:`\min(\max(0,x), 6)` element-wise.
3392
3471
 
3393
3472
  Args:
3394
- x (Tensor): Input Tensor of float16 or float32 data type.
3473
+ x (Tensor): Tensor of shape :math:`(N, *)`,
3474
+ where :math:`*` means any number of additional dimensions.
3475
+ Data type must be float16, float32.
3395
3476
 
3396
3477
  Returns:
3397
3478
  Tensor, with the same dtype and shape as the `x`.
@@ -3528,6 +3609,9 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
3528
3609
  _lower = Tensor(lower, mstype.float32)
3529
3610
  _upper = Tensor(upper, mstype.float32)
3530
3611
  _size = input.shape
3612
+ if ops.is_sequence_value_unknown(_size):
3613
+ dyn_shape = _get_cache_prim(P.TensorShape)()
3614
+ _size = dyn_shape(input)
3531
3615
  sign_matrix = _get_cache_prim(P.Sign)()(input)
3532
3616
  negative_filter = sign_matrix.clip(None, 0)
3533
3617
  positive_filter = sign_matrix.clip(0, None)
@@ -3615,11 +3699,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3615
3699
  l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
3616
3700
  \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
3617
3701
 
3618
- where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight,
3619
- N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
3620
- classes.
3702
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
3703
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
3621
3704
 
3622
- If reduction is not 'none' (default 'mean'), then
3705
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
3623
3706
 
3624
3707
  .. math::
3625
3708
 
@@ -3638,11 +3721,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3638
3721
  \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
3639
3722
  l_n = - \sum_{c=1}^C w_c \log \frac{\exp(x_{n,c})}{\sum_{i=1}^C \exp(x_{n,i})} y_{n,c}
3640
3723
 
3641
- where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight,
3642
- N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
3643
- classes.
3724
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
3725
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
3644
3726
 
3645
- If reduction is not 'none' (default 'mean'), then
3727
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
3646
3728
 
3647
3729
  .. math::
3648
3730
 
@@ -3658,16 +3740,19 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3658
3740
  in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)`.
3659
3741
  `input` is expected to be log-probabilities, data type must be float16 or float32.
3660
3742
  target (Tensor): For class indices, tensor of shape :math:`()`, :math:`(N)` or
3661
- :math:`(N, d_1, d_2, ..., d_K)` , data type must be int32.
3662
- For probabilities, tensor of shape :math:`(C,)` :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` ,
3663
- data type must be float16 or float32.
3743
+ :math:`(N, d_1, d_2, ..., d_K)` , data type must be int32. For probabilities, tensor of shape :math:`(C,)` ,
3744
+ :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32.
3664
3745
  weight (Tensor): A rescaling weight applied to the loss of each batch element.
3665
- If not None, the shape is :math:`(C,)`,
3666
- data type must be float16 or float32. Default: ``None`` .
3746
+ If not None, the shape is :math:`(C,)`, data type must be float16 or float32. Default: ``None`` .
3667
3747
  ignore_index (int): Specifies a target value that is ignored
3668
3748
  and does not contribute to the input gradient. Default: ``-100`` .
3669
- reduction (str): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , or ``'sum'`` .
3670
- Default: ``'mean'`` .
3749
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3750
+ ``'sum'`` . Default: ``'mean'`` .
3751
+
3752
+ - ``'none'``: no reduction will be applied.
3753
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
3754
+ - ``'sum'``: the output elements will be summed.
3755
+
3671
3756
  label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
3672
3757
  from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
3673
3758
 
@@ -3678,17 +3763,16 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3678
3763
  ``Ascend`` ``GPU`` ``CPU``
3679
3764
 
3680
3765
  Examples:
3681
- >>> import mindspore
3766
+ >>> import mindspore as ms
3682
3767
  >>> import numpy as np
3683
- >>> from mindspore import Tensor, ops
3684
3768
  >>> # Case 1: Indices labels
3685
- >>> inputs = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
3686
- >>> target = mindspore.Tensor(np.array([1, 0, 4]), mindspore.int32)
3687
- >>> output = ops.cross_entropy(inputs, target)
3769
+ >>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
3770
+ >>> target = ms.Tensor(np.array([1, 0, 4]), ms.int32)
3771
+ >>> output = ms.ops.cross_entropy(inputs, target)
3688
3772
  >>> # Case 2: Probability labels
3689
- >>> inputs = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
3690
- >>> target = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
3691
- >>> output = ops.cross_entropy(inputs, target)
3773
+ >>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
3774
+ >>> target = ms.Tensor(np.random.randn(3, 5), ms.float32)
3775
+ >>> output = ms.ops.cross_entropy(inputs, target)
3692
3776
  """
3693
3777
  _check_is_tensor('input', input, "cross_entropy_loss")
3694
3778
  _check_is_tensor('target', target, "cross_entropy_loss")
@@ -3743,7 +3827,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
3743
3827
  N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
3744
3828
  classes.
3745
3829
 
3746
- If reduction is not 'none' (default 'mean'), then
3830
+ If `reduction` is not ``None`` (default 'mean'), then
3747
3831
 
3748
3832
  .. math::
3749
3833
 
@@ -3763,8 +3847,13 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
3763
3847
  The data type must be float16 or float32. Default: ``None`` .
3764
3848
  ignore_index (int): Specifies a target value that is ignored
3765
3849
  and does not contribute to the input gradient. Default: ``-100`` .
3766
- reduction (str): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , or ``'sum'`` .
3767
- Default: ``'mean'`` .
3850
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3851
+ ``'sum'`` . Default: ``'mean'`` .
3852
+
3853
+ - ``'none'``: no reduction will be applied.
3854
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
3855
+ - ``'sum'``: the output elements will be summed.
3856
+
3768
3857
  label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
3769
3858
  from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
3770
3859
 
@@ -3858,7 +3947,7 @@ def l1_loss(input, target, reduction='mean'):
3858
3947
  r"""
3859
3948
  Calculate the mean absolute error between the `input` value and the `target` value.
3860
3949
 
3861
- Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to "none" ,
3950
+ Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to ``"none"``,
3862
3951
  then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
3863
3952
 
3864
3953
  The formula is as follows:
@@ -3881,18 +3970,21 @@ def l1_loss(input, target, reduction='mean'):
3881
3970
  input (Tensor): Predicted value, Tensor of any dimension.
3882
3971
  target (Tensor): Target value, usually has the same shape as the `input`.
3883
3972
  If `input` and `target` have different shape, make sure they can broadcast to each other.
3884
- reduction (str, optional): Type of reduction to be applied to loss.
3885
- The optional value is ``"mean"`` , ``"sum"`` or
3886
- ``"none"`` . Default: ``'mean'`` .
3973
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3974
+ ``'sum'`` . Default: ``'mean'`` .
3975
+
3976
+ - ``'none'``: no reduction will be applied.
3977
+ - ``'mean'``: compute and return the mean of elements in the output.
3978
+ - ``'sum'``: the output elements will be summed.
3887
3979
 
3888
3980
  Returns:
3889
- Tensor or Scalar, if `reduction` is "none", return a Tensor with same shape and dtype as `input`.
3981
+ Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
3890
3982
  Otherwise, a scalar value will be returned.
3891
3983
 
3892
3984
  Raises:
3893
3985
  TypeError: If `input` is not a Tensor.
3894
3986
  TypeError: If `target` is not a Tensor.
3895
- ValueError: If `reduction` is not one of "none", "mean" or "sum".
3987
+ ValueError: If `reduction` is not one of ``"none"``, ``"mean"`` or ``"sum"``.
3896
3988
 
3897
3989
  Supported Platforms:
3898
3990
  ``Ascend`` ``GPU`` ``CPU``
@@ -3948,16 +4040,20 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
3948
4040
  target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`, same shape and dtype as the `input`.
3949
4041
  beta (float): A parameter used to control the point where the function will change between
3950
4042
  L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
3951
- reduction (str): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` or ``'sum'`` .
3952
- Default: ``'none'`` .
4043
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4044
+ ``'sum'`` . Default: ``'none'`` .
4045
+
4046
+ - ``'none'``: no reduction will be applied.
4047
+ - ``'mean'``: compute and return the mean of elements in the output.
4048
+ - ``'sum'``: the output elements will be summed.
3953
4049
 
3954
4050
  Returns:
3955
- Tensor, if `reduction` is 'none', then output is a tensor with the same shape as `input`.
4051
+ Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `input`.
3956
4052
  Otherwise, the shape of output tensor is :math:`(1,)`.
3957
4053
 
3958
4054
  Raises:
3959
4055
  TypeError: If `beta` is not a float.
3960
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
4056
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
3961
4057
  TypeError: If dtype of `input` or `target` is not one of float16, float32, float64.
3962
4058
  ValueError: If `beta` is less than or equal to 0.
3963
4059
  ValueError: If shape of `input` is not the same as `target`.
@@ -4072,6 +4168,7 @@ def leaky_relu(input, alpha=0.2):
4072
4168
  select_op = _get_cache_prim(P.Maximum)()
4073
4169
  if alpha > 1:
4074
4170
  select_op = _get_cache_prim(P.Minimum)()
4171
+ alpha = _get_cache_prim(P.Cast)()(F.scalar_to_tensor(alpha), input.dtype)
4075
4172
  return select_op(alpha * input, input)
4076
4173
 
4077
4174
 
@@ -4158,6 +4255,10 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
4158
4255
  r"""
4159
4256
  Local Response Normalization.
4160
4257
 
4258
+ .. warning::
4259
+ lrn is deprecated on Ascend due to potential accuracy problem. It's recommended to use other
4260
+ normalization methods, e.g. :class:`mindspore.ops.batch_norm`.
4261
+
4161
4262
  .. math::
4162
4263
 
4163
4264
  b_{c} = a_{c}\left(k + \frac{\alpha}{n}
@@ -4186,7 +4287,7 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
4186
4287
  TypeError: If `x` is not a Tensor.
4187
4288
 
4188
4289
  Supported Platforms:
4189
- ``Ascend`` ``GPU`` ``CPU``
4290
+ ``GPU`` ``CPU``
4190
4291
 
4191
4292
  Examples:
4192
4293
  >>> import mindspore
@@ -4219,7 +4320,11 @@ def mish(x):
4219
4320
  <https://arxiv.org/abs/1908.08681>`_.
4220
4321
 
4221
4322
  Args:
4222
- x (Tensor): The input Tensor with float16, float32 or float64 data type.
4323
+ x (Tensor): The input Tensor.
4324
+ Supported dtypes:
4325
+
4326
+ - GPU/CPU: float16, float32, float64.
4327
+ - Ascend: float16, float32.
4223
4328
 
4224
4329
  Returns:
4225
4330
  Tensor, with the same type and shape as the `x`.
@@ -4320,10 +4425,40 @@ def _check_type_and_shape_same(param_name1, input_data1, param_name2, input_data
4320
4425
 
4321
4426
 
4322
4427
  def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
4323
- """
4428
+ r"""
4324
4429
  MarginRankingLoss creates a criterion that measures the loss.
4325
4430
 
4326
- For details, please refer to :class:`mindspore.nn.MarginRankingLoss`.
4431
+ Given two tensors :math:`input1`, :math:`input2` and a Tensor label :math:`target` with values 1 or -1,
4432
+ the operation is as follows:
4433
+
4434
+ .. math::
4435
+ \text{loss}(input1, input2, target) = \max(0, -target * (input1 - input2) + \text{margin})
4436
+
4437
+ Args:
4438
+ input1 (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4439
+ input2 (Tensor): Tensor of shape :math:`(N, *)`, same shape and dtype as `input1`.
4440
+ target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
4441
+ :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_2, x_3, ..., x_R)`.
4442
+ margin (float, optional): Specify the adjustment factor of the operation. Default: ``0.0`` .
4443
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4444
+ ``'sum'`` . Default: ``'mean'`` .
4445
+
4446
+ - ``'none'``: no reduction will be applied.
4447
+ - ``'mean'``: compute and return the mean of elements in the output.
4448
+ - ``'sum'``: the output elements will be summed.
4449
+
4450
+ Returns:
4451
+ Tensor or Scalar. if `reduction` is ``"none"``, its shape is the same as `labels`.
4452
+ Otherwise, a scalar value will be returned.
4453
+
4454
+ Raises:
4455
+ TypeError: If `margin` is not a float.
4456
+ TypeError: If `input1`, `input2` or `target` is not a Tensor.
4457
+ TypeError: If the types of `input1` and `input2` are inconsistent.
4458
+ TypeError: If the types of `input1` and `target` are inconsistent.
4459
+ ValueError: If the shape of `input1` and `input2` are inconsistent.
4460
+ ValueError: If the shape of `input1` and `target` are inconsistent.
4461
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` , ``'sum'``.
4327
4462
 
4328
4463
  Supported Platforms:
4329
4464
  ``Ascend`` ``GPU`` ``CPU``
@@ -4334,7 +4469,7 @@ def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
4334
4469
  >>> import numpy as np
4335
4470
  >>> input1 = Tensor(np.array([0.3864, -2.4093, -1.4076]), ms.float32)
4336
4471
  >>> input2 = Tensor(np.array([-0.6012, -1.6681, 1.2928]), ms.float32)
4337
- >>> target = sign(Tensor(np.array([-2, -2, 3]), ms.float32))
4472
+ >>> target = ops.Sign()(Tensor(np.array([-2, -2, 3]), ms.float32))
4338
4473
  >>> output = ops.margin_ranking_loss(input1, input2, target)
4339
4474
  >>> print(output)
4340
4475
  1.2293333
@@ -4375,17 +4510,20 @@ def cosine_embedding_loss(input1, input2, target, margin=0.0, reduction="mean"):
4375
4510
  target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
4376
4511
  :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_3, x_4, ..., x_R)`.
4377
4512
  margin (float, optional): Should be in [-1.0, 1.0]. Default: 0.0.
4378
- reduction (str, optional): Specifies which reduction to be applied to the output. It must be one of
4379
- ``"none"`` , ``"mean"`` , and ``"sum"`` ,
4380
- meaning no reduction, reduce mean and sum on output, respectively. Default: ``"mean"`` .
4513
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4514
+ ``'sum'`` . Default: ``'mean'`` .
4515
+
4516
+ - ``'none'``: no reduction will be applied.
4517
+ - ``'mean'``: compute and return the mean of elements in the output.
4518
+ - ``'sum'``: the output elements will be summed.
4381
4519
 
4382
4520
  Returns:
4383
- Tensor or Scalar, if `reduction` is "none", its shape is the same as `target`.
4521
+ Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `target`.
4384
4522
  Otherwise, a scalar value will be returned.
4385
4523
 
4386
4524
  Raises:
4387
4525
  TypeError: If `margin` is not a float.
4388
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
4526
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
4389
4527
  ValueError: If `margin` is not in range [-1, 1].
4390
4528
 
4391
4529
  Supported Platforms:
@@ -4471,6 +4609,19 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
4471
4609
 
4472
4610
  - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})`.
4473
4611
  It has the same data type as `x`.
4612
+
4613
+ .. math::
4614
+ D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
4615
+ (\text{kernel_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
4616
+
4617
+ .. math::
4618
+ H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
4619
+ (\text{kernel_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
4620
+
4621
+ .. math::
4622
+ W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times
4623
+ (\text{kernel_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
4624
+
4474
4625
  - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be return
4475
4626
  only when `return_indices` is ``True`` .
4476
4627
 
@@ -4529,14 +4680,24 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
4529
4680
 
4530
4681
  Args:
4531
4682
  input (Tensor): input with shape of :math:`(N, C, H_{in}, W_{in})` (4-D case) or :math:`(N, C, D_{in},
4532
- H_{in}, W_{in})` (5-D case) and dtype of float16, float32 or float64.
4683
+ H_{in}, W_{in})` (5-D case) and dtype of float32 or float64.
4533
4684
  grid (Tensor): flow-field with shape of :math:`(N, H_{out}, W_{out}, 2)` (4-D case) or :math:`(N, D_{out},
4534
4685
  H_{out}, W_{out}, 3)` (5-D case) and same dtype as `input`.
4535
4686
  mode (str): An optional string specifying the interpolation method. The optional values are
4536
- 'bilinear', 'nearest'. Default: ``'bilinear'`` . Note: `bicubic` is not supported yet. When
4687
+ ``'bilinear'``, ``'nearest'``. Default: ``'bilinear'`` . Note: `bicubic` is not supported yet. When
4537
4688
  `mode="bilinear"` and the input is 5-D, the interpolation mode used internally will actually
4538
4689
  be trilinear. However, when the input is 4-D, the interpolation mode will legistimately be bilinear.
4539
4690
  Default: ``'bilinear'`` .
4691
+
4692
+ - ``'nearest'``: Nearest neighbor interpolation. Each output pixel is assigned the value of the
4693
+ nearest input pixel. This method is simple and fast but can result in blocky or pixelated outputs.
4694
+ - ``'bilinear'``: Bilinear interpolation. Each output pixel is a weighted average of the four nearest input
4695
+ pixels, computed using bilinear interpolation. This method produces smoother results compared
4696
+ to nearest neighbor interpolation.
4697
+ - ``'trilinear'``: Trilinear interpolation. This is an extension of bilinear interpolation to 3D data.
4698
+ It performs bilinear interpolation in the two spatial dimensions and linear interpolation along
4699
+ the third dimension. It is commonly used for volume or 3D image interpolation.
4700
+
4540
4701
  padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
4541
4702
  "reflection". Default: ``'zeros'`` .
4542
4703
  align_corners (bool): An optional bool. If set to `True`, the extrema (-1 and 1) are considered as referring to
@@ -4617,10 +4778,13 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
4617
4778
  input_lengths (Union(tuple, Tensor)): Lengths of the input. A tuple or Tensor of shape(N).
4618
4779
  target_lengths (Union(tuple, Tensor)): Lengths of the target. A tuple or Tensor of shape(N).
4619
4780
  blank (int, optional): The blank label. Default: ``0`` .
4620
- reduction (str, optional): Implements the reduction method to the output with
4621
- ``'none'`` , ``'mean'`` , or ``'sum'`` ,
4622
- respectively indicate that no calculation is specified, that the mean is used, and that is calculated
4623
- using summation. Default: ``"mean"`` .
4781
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4782
+ ``'sum'`` . Default: ``'mean'`` .
4783
+
4784
+ - ``'none'``: no reduction will be applied.
4785
+ - ``'mean'``: compute and return the mean of elements in the output.
4786
+ - ``'sum'``: the output elements will be summed.
4787
+
4624
4788
  zero_infinity (bool, optional): Whether to set infinite loss and correlation gradient to 0. Default: ``False`` .
4625
4789
 
4626
4790
  Returns:
@@ -4704,8 +4868,12 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
4704
4868
  full (bool, optional): Include the constant term in the loss calculation. When :math:`full=True`,
4705
4869
  the constant term will be :math:`const = 0.5*log(2\pi)`. Default: ``False``.
4706
4870
  eps (float, optional): Used to improve the stability of log function must be greater than 0. Default: ``1e-6`` .
4707
- reduction (str, optional): Apply specific reduction method to the
4708
- output: ``"none"``, ``"mean"``, or ``"sum"``. Default: ``'mean'``.
4871
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4872
+ ``'sum'`` . Default: ``'mean'`` .
4873
+
4874
+ - ``'none'``: no reduction will be applied.
4875
+ - ``'mean'``: compute and return the mean of elements in the output.
4876
+ - ``'sum'``: the output elements will be summed.
4709
4877
 
4710
4878
  Returns:
4711
4879
  Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
@@ -4722,8 +4890,7 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
4722
4890
 
4723
4891
  Examples:
4724
4892
  >>> import numpy as np
4725
- >>> from mindspore import Tensor
4726
- >>> import mindspore.ops as ops
4893
+ >>> from mindspore import Tensor, ops
4727
4894
  >>> import mindspore.common.dtype as mstype
4728
4895
  >>> arr1 = np.arange(8).reshape((4, 2))
4729
4896
  >>> arr2 = np.array([2, 3, 1, 4, 6, 4, 4, 9]).reshape((4, 2))
@@ -4831,9 +4998,12 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
4831
4998
  Has the same shape as `inputs`, contains -1 or 1.
4832
4999
  margin (float, int): Threshold defined by Hinge Embedding Loss :math:`margin`.
4833
5000
  Represented as :math:`\Delta` in the formula. Default: ``1.0`` .
4834
- reduction (str): Specify the computing method to be applied to the outputs:
4835
- ``'none'`` , ``'mean'`` , or ``'sum'`` .
4836
- Default: ``'mean'`` .
5001
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
5002
+ ``'sum'`` . Default: ``'mean'`` .
5003
+
5004
+ - ``'none'``: no reduction will be applied.
5005
+ - ``'mean'``: compute and return the mean of elements in the output.
5006
+ - ``'sum'``: the output elements will be summed.
4837
5007
 
4838
5008
  Returns:
4839
5009
  Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
@@ -4843,7 +5013,7 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
4843
5013
  TypeError: If `targets` is not a Tensor.
4844
5014
  TypeError: If `margin` is not a float or int.
4845
5015
  ValueError: If `targets` does not have the same shape as `inputs` or they could not broadcast to each other.
4846
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
5016
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
4847
5017
 
4848
5018
  Supported Platforms:
4849
5019
  ``Ascend`` ``GPU`` ``CPU``
@@ -4889,6 +5059,9 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
4889
5059
  r"""
4890
5060
  Performs greedy decoding on the logits given in inputs.
4891
5061
 
5062
+ Note:
5063
+ On Ascend, 'merge_repeated' can not be set to false.
5064
+
4892
5065
  Args:
4893
5066
  inputs (Tensor): The input Tensor must be a 3-D tensor whose shape is
4894
5067
  :math:`(max\_time, batch\_size, num\_classes)`. `num_classes` must be `num_labels + 1` classes,
@@ -5068,74 +5241,87 @@ def _check_conv_iterable_lengths(iterable, dim, iter_name):
5068
5241
 
5069
5242
  def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
5070
5243
  r"""
5071
- Applies a 1D convolution over an input tensor.
5072
- The input tensor is typically of shape :math:`(N, C_{in}, W_{in})`,
5073
- where :math:`N` is batch size, :math:`C_{in}` is channel number, :math:`W` is width, :math:`X_i` is
5074
- the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
5075
- For each batch of shape :math:`(C_{in}, W_{in})`, the formula is defined as:
5244
+ Applies a 1D convolution over an input tensor. The input Tensor is typically
5245
+ of shape :math:`(N, C_{in}, L_{in})`,
5246
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`L` is input sequence width.
5247
+
5248
+ The output is calculated based on formula:
5076
5249
 
5077
5250
  .. math::
5078
5251
 
5079
- out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{j}, X_i) + b_j,
5252
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
5253
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
5254
+
5255
+ where :math:`bias` is the output channel bias, :math:`ccor` is
5256
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5257
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5258
+
5259
+ Here are the indices' meanings:
5260
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
5261
+
5262
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
5263
+ output channels, which is also equal to the number of kernels.
5080
5264
 
5081
- where :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ operator,
5082
- :math:`C_{in}` is the input channel number, :math:`j` ranges
5083
- from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
5084
- filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{j}` is a slice
5085
- of kernel, and it has shape :math:`(\text{kernal_size})`, where :math:`\text{kernel_size}` is the width of
5086
- the convolution kernel. The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
5087
- where `groups` is the group number to split the input in the channel dimension.
5265
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
5266
+ input channels, which is also equal to the number of channels in the convolutional kernels.
5088
5267
 
5089
- If the `pad_mode` is set to be "valid", the output width will be :math:`\left \lfloor{
5090
- 1 + \frac{W_{in} + \text{padding[0]} - \text{kernel_size} - (\text{kernel_size} - 1) \times(\text{dilation} - 1)}
5091
- {\text { stride }}} \right \rfloor`.
5268
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
5269
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
5270
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
5271
+ channel in the :math:`i`-th batch of the input feature map.
5092
5272
 
5093
- where :math:`dilation` is spacing between kernel elements, :math:`stride` is The step length of each step,
5094
- :math:`padding` is zero-padding added to both sides of the input.
5095
- For output width on other `pad_mode`, please refer to formula on `mindspore.nn.Conv1d
5096
- <https://www.mindspore.cn/docs/en/r2.1/api_python/nn/mindspore.nn.Conv2d.html>`_.
5273
+ The shape of the convolutional kernel is given by :math:`(kernel\_size)`,
5274
+ where :math:`kernel\_size` is the width of the kernel.
5275
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5276
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`,
5277
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
5097
5278
 
5098
- The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
5099
- <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. More detailed introduction can be found here:
5100
- `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
5279
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5280
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
5281
+ and `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
5101
5282
 
5102
5283
  Note:
5103
5284
  On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
5104
5285
  That is, when `groups>1`, condition `C_{in}` = `C_{out}` = `groups` must be satisfied.
5105
5286
 
5106
5287
  Args:
5107
- input (Tensor): Tensor of shape :math:`(N, C_{in}, W_{in})`.
5108
- weight (Tensor): Tensor of shape
5109
- :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`, then the size of kernel is
5110
- :math:`(\text{kernel_size})`.
5111
- bias (Tensor): Bias Tensor with shape :math:`(C_{out})`.
5288
+ input (Tensor): Input Tensor of shape :math:`(N, C_{in}, L_{in})`.
5289
+ weight (Tensor): The convolutional kernel value, it should has shape
5290
+ :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`.
5291
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
5112
5292
  When bias is None, zeros will be used. Default: ``None`` .
5113
5293
  stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number or a tuple of one int
5114
- that represents width of movement. Default: 1.
5294
+ that represents width of movement. Default: ``1``.
5115
5295
  pad_mode (str, optional): Specifies padding mode. The optional values are
5116
5296
  ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
5117
5297
 
5118
- - same: Adopts the way of completion. The height and width of the output will be equal to
5298
+ - ``"same"``: Adopts the way of completion. The height and width of the output will be equal to
5119
5299
  the input `x` divided by stride. The padding will be evenly calculated in left and right possiblily.
5120
5300
  Otherwise, the last extra padding will be calculated from the right side.
5121
5301
  If this mode is set, `padding` must be 0.
5122
5302
 
5123
- - valid: Adopts the way of discarding. The possible largest width of output will be returned
5303
+ - ``"valid"``: Adopts the way of discarding. The possible largest width of output will be returned
5124
5304
  without padding. Extra pixels will be discarded. If this mode is set, `padding` must be 0.
5125
5305
 
5126
- - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input
5306
+ - ``"pad"``: Implicit paddings on both sides of the input `x`.
5307
+ The number of `padding` will be padded to the input
5127
5308
  Tensor borders. `padding` must be greater than or equal to 0.
5128
- padding (Union(int, tuple[int], list[int]), optional): Implicit paddings on both sides of `input`, meaning the
5309
+ padding (Union(int, tuple[int], list[int]), optional): Specifies the amount of padding to apply on
5310
+ both side of `input` when `pad_mode` is set to ``"pad"``. The
5129
5311
  paddings of left and right are the same, equal to padding or padding[0] when padding is a tuple of
5130
5312
  1 integer. Default: ``0`` .
5131
- dilation (Union(int, tuple[int]), optional): Gaps between kernel elements. The data type is int or a tuple of
5132
- 1 integer. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
5133
- there will be :math:`k - 1` pixels skipped for each sampling location. Its value must be greater than or
5134
- equal to 1 and bounded by the width of `input`. Default: ``1`` .
5313
+ dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution.
5314
+ It can be a single int or a tuple of 1 integer.
5315
+ Assuming :math:`dilation=(d0,)`, the convolutional kernel samples the input with a
5316
+ spacing of :math:`d0-1` elements in the width direction.
5317
+ The value should be in the ranges [1, L].
5318
+ Default: ``1`` .
5135
5319
  groups (int, optional): Splits `input` into groups. Default: ``1`` .
5136
5320
 
5137
5321
  Returns:
5138
- Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, W_{out})`.
5322
+ Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, L_{out})`.
5323
+ To see how different pad modes affect the output shape, please refer to
5324
+ :class:`mindspore.nn.Conv1d` for more details.
5139
5325
 
5140
5326
  Raises:
5141
5327
  TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
@@ -5204,40 +5390,44 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5204
5390
 
5205
5391
  def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
5206
5392
  r"""
5207
- Applies a 2D convolution over an input tensor.
5208
- The input tensor is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
5209
- where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is height, :math:`W` is width, :math:`X_i` is
5210
- the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
5211
- For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
5393
+ Applies a 2D convolution over an input tensor. The input tenor is typically of
5394
+ shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
5395
+ channel number, :math:`H` is feature height, :math:`W` is feature width.
5396
+
5397
+ The output is calculated based on formula:
5212
5398
 
5213
5399
  .. math::
5214
5400
 
5215
- out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
5401
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
5402
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
5403
+
5404
+ where :math:`bias` is the output channel bias, :math:`ccor` is
5405
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5406
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5407
+
5408
+ Here are the indices' meanings:
5409
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
5216
5410
 
5217
- where :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ operator,
5218
- :math:`C_{in}` is the input channel number, :math:`j` ranges
5219
- from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
5220
- filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
5221
- of kernel, and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, where :math:`\text{
5222
- kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution kernel.
5223
- The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
5224
- where `groups` is the group number to split the input in the channel dimension.
5411
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
5412
+ output channels, which is also equal to the number of kernels.
5225
5413
 
5226
- If the `pad_mode` is set to be "valid", the output height and width will be :math:`\left \lfloor{
5227
- 1 + \frac{H_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
5228
- (\text{kernel_size[0]} - 1) \times(\text{dilation[0]} - 1)} {\text { stride[0] }}} \right \rfloor` and
5414
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
5415
+ input channels, which is also equal to the number of channels in the convolutional kernels.
5229
5416
 
5230
- :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
5231
- (\text{kernel_size[1]} - 1) \times(\text{dilation[1]} - 1)} {\text { stride[1] }}} \right \rfloor` respectively.
5417
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
5418
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
5419
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
5420
+ channel in the :math:`i`-th batch of the input feature map.
5232
5421
 
5233
- where :math:`dilation` is spacing between kernel elements, :math:`stride` is The step length of each step,
5234
- :math:`padding` is zero-padding added to both sides of the input.
5235
- For output height and width on other `pad_mode`, please refer to formula on `mindspore.nn.Conv2d
5236
- <https://www.mindspore.cn/docs/en/r2.1/api_python/nn/mindspore.nn.Conv2d.html>`_.
5422
+ The shape of the convolutional kernel is given by :math:`(kernel\_size[0], kernel\_size[1])`,
5423
+ where :math:`kernel\_size[0]` and :math:`kernel\_size[1]` are the height and width of the kernel, respectively.
5424
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5425
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
5426
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
5237
5427
 
5238
- The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
5239
- <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. More detailed introduction can be found here:
5240
- `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
5428
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5429
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
5430
+ `ConvNets <http://cs231n.github.io/convolutional-networks/>`_.
5241
5431
 
5242
5432
  Note:
5243
5433
  On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
@@ -5248,7 +5438,7 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5248
5438
  weight (Tensor): Tensor of shape
5249
5439
  :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
5250
5440
  is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
5251
- bias (Tensor): Bias Tensor with shape :math:`(C_{out})`.
5441
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
5252
5442
  When bias is ``None`` , zeros will be used. Default: ``None`` .
5253
5443
  stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
5254
5444
  the height and width of movement are both strides, or a tuple of two int numbers that
@@ -5278,6 +5468,9 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5278
5468
 
5279
5469
  Returns:
5280
5470
  Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
5471
+ To see how different pad modes affect the output shape, please refer to
5472
+ :class:`mindspore.nn.Conv2d` for more details.
5473
+
5281
5474
 
5282
5475
  Raises:
5283
5476
  TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
@@ -5421,8 +5614,9 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
5421
5614
  Calculates the error between the predicted value and the target value,
5422
5615
  which has the best of both the loss of l1 and the loss of mse.
5423
5616
 
5424
- Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the reduction parameter is set to "none"
5425
- then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction. The formula is as follows:
5617
+ Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the `reduction` parameter
5618
+ is set to ``"none"`` then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
5619
+ The formula is as follows:
5426
5620
 
5427
5621
  .. math::
5428
5622
  \ell(x, y) = L = \{l_1,\dots,l_N\}^\top
@@ -5451,21 +5645,25 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
5451
5645
  target (Tensor): Target value, has same dtype and shape as the `input` in common cases.
5452
5646
  However, when the shape of `target` is different from the shape of `input`,
5453
5647
  and they should be broadcasted to each other.
5454
- reduction (str): Type of reduction to be applied to loss.
5455
- The optional values are ``'mean'`` , ``'sum'`` and ``'none'`` .
5456
- Default: ``'mean'``.
5648
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
5649
+ ``'sum'`` . Default: ``'mean'`` .
5650
+
5651
+ - ``'none'``: no reduction will be applied.
5652
+ - ``'mean'``: compute and return the mean of elements in the output.
5653
+ - ``'sum'``: the output elements will be summed.
5654
+
5457
5655
  delta (Union[int, float]): The threshold to change between two type of loss.
5458
5656
  The value must be greater than zero. Default: ``1.0`` .
5459
5657
 
5460
5658
  Returns:
5461
- Tensor or Scalar, if `reduction` is "none", return a Tensor with same shape and dtype as `input`.
5659
+ Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
5462
5660
  Otherwise, a scalar value will be returned.
5463
5661
 
5464
5662
  Raises:
5465
5663
  TypeError: If `input` or `target` is not a Tensor.
5466
5664
  TypeError: If dtype of `delta` is neither float nor int.
5467
5665
  ValueError: If `delta` is less than or equal to 0.
5468
- ValueError: If `reduction` is not one of "none", "mean", "sum".
5666
+ ValueError: If `reduction` is not one of ``"none"``, ``"mean"``, ``"sum"``.
5469
5667
  ValueError: If `input` and `target` have different shapes and cannot be broadcasted to each other.
5470
5668
 
5471
5669
  Supported Platforms:
@@ -5655,15 +5853,20 @@ def bias_add(input_x, bias):
5655
5853
  consistent with the shape of the `input_x` Tensor.
5656
5854
 
5657
5855
  Args:
5658
- input_x (Tensor): The input tensor. The shape can be 2-5 dimensions.
5659
- bias (Tensor): The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of `input_x`.
5856
+ input_x (Tensor): The input tensor. The shape can be 2-5 dimensions. Supported dtypes:
5857
+
5858
+ - Ascend/CPU: all Number type.
5859
+ - GPU: float16, float32, int8.
5860
+
5861
+ bias (Tensor): The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of
5862
+ `input_x`. It has the same type as `input_x`.
5660
5863
 
5661
5864
  Returns:
5662
5865
  Tensor, with the same shape and data type as `input_x`.
5663
5866
 
5664
5867
  Raises:
5665
5868
  TypeError: If `input_x` or `bias` is not a Tensor.
5666
- TypeError: If dtype of `input_x` or `bias` is inconsistent.
5869
+ TypeError: If dtype of `input_x` and `bias` is inconsistent.
5667
5870
  TypeError: If dimension of `input_x` is not in the range [2, 5].
5668
5871
 
5669
5872
  Supported Platforms:
@@ -5718,11 +5921,12 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
5718
5921
  the loss function
5719
5922
  will not consider any sample weights, and each sample will be treated as having equal importance
5720
5923
  when calculating the loss.
5721
- reduction (str, optional): Specify the protocol calculation method used to output the results.
5722
- Its value must be one of ``'none'`` , ``'mean'`` or ``'sum'`` ,
5723
- respectively indicate that no calculation method is
5724
- specified, using the average value for calculation, and using summation for calculation, not case-sensitive.
5725
- Default: ``'mean'`` .
5924
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
5925
+ ``'sum'`` . Default: ``'mean'`` .
5926
+
5927
+ - ``'none'``: no reduction will be applied.
5928
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
5929
+ - ``'sum'``: the output elements will be summed.
5726
5930
 
5727
5931
  Returns:
5728
5932
  Tensor or Scalar. Returns Tensor that has the same dtype and shape as `logits` if `reduction` is 'none'.
@@ -5731,7 +5935,7 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
5731
5935
  Raises:
5732
5936
  TypeError: If `logits`, `labels` or `weight` is not a Tensor.
5733
5937
  TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 nor float32.
5734
- ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
5938
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
5735
5939
  ValueError: If shape of `labels` is not the same as `logits` or `weight` (if given).
5736
5940
 
5737
5941
  Supported Platforms:
@@ -5754,32 +5958,46 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
5754
5958
 
5755
5959
  def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
5756
5960
  r"""
5757
- Applies a 3D convolution over an input tensor. The input tensor is typically of shape
5758
- :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape
5759
- :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, where :math:`N` is batch size, :math:`C` is channel number,
5760
- :math:`D` is depth, :math:`H, W` is feature height and width respectively.
5761
- the output value of a layer is calculated as:
5961
+ Applies a 3D convolution over an input tensor. The input tensor is typically of
5962
+ shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C`
5963
+ is channel number, :math:`D` is feature depth, :math:`H` is feature height, :math:`W` is feature width.
5964
+
5965
+ The output is calculated based on formula:
5762
5966
 
5763
5967
  .. math::
5764
- \operatorname{out}\left(N_{i}, C_{\text {out}_j}\right)=\operatorname{bias}\left(C_{\text {out}_j}\right)+
5765
- \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right),
5766
- \operatorname{input}\left(N_{i}, k\right))
5767
-
5768
- where :math:`k` is kernel,
5769
- :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ ,
5770
- :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of
5771
- the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)`
5772
- is a convolution kernel slice with shape
5773
- :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
5774
- where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are
5775
- the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter
5776
- and :math:`\text{X}` is the input tensor.
5777
- The shape of full convolution kernel is
5778
- :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
5779
- where `groups` is the number of groups to split `input` in the channel dimension.
5780
-
5781
- For more details, please refer to the paper `Gradient Based Learning Applied to Document
5782
- Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ .
5968
+
5969
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
5970
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
5971
+
5972
+ where :math:`bias` is the output channel bias, :math:`ccor` is
5973
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5974
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5975
+
5976
+ Here are the indices' meanings:
5977
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
5978
+
5979
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
5980
+ output channels, which is also equal to the number of kernels.
5981
+
5982
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
5983
+ input channels, which is also equal to the number of channels in the convolutional kernels.
5984
+
5985
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
5986
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
5987
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
5988
+ channel in the :math:`i`-th batch of the input feature map.
5989
+
5990
+ The shape of the convolutional kernel is given by
5991
+ :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
5992
+ where :math:`kernel\_size[0]` , :math:`kernel\_size[1]` and :math:`kernel\_size[2]` are the depth,
5993
+ height and width of the kernel, respectively.
5994
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5995
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
5996
+ \text{kernel_size[1]}, \text{kernel_size[2]})`,
5997
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
5998
+
5999
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
6000
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
5783
6001
 
5784
6002
  Note:
5785
6003
  1. On Ascend platform, `groups = 1` must be satisfied.
@@ -5790,8 +6008,8 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5790
6008
  weight (Tensor): Set size of kernel is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]},
5791
6009
  \text{kernel_size[2]})`, then the shape is :math:`(C_{out}, C_{in}, \text{kernel_size[0]},
5792
6010
  \text{kernel_size[1]}, \text{kernel_size[1]})`.
5793
- bias (Tensor): Bias Tensor with shape :math:`(C_{out})`. When bias is None, zeros will be used. Default:
5794
- ``None`` .
6011
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
6012
+ When bias is None, zeros will be used. Default: ``None`` .
5795
6013
  stride (Union[int, tuple[int]], optional): The distance of kernel moving,
5796
6014
  it can be an int number that represents
5797
6015
  the depth, height and width of movement or a tuple of three int numbers that
@@ -5799,18 +6017,18 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5799
6017
  pad_mode (str, optional): Specifies padding mode. The optional values are
5800
6018
  ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
5801
6019
 
5802
- - same: Adopts the way of completion. The depth, height and width of the output will be equal to
6020
+ - ``"same"``: Adopts the way of completion. The depth, height and width of the output will be equal to
5803
6021
  the input `x` divided by stride. The padding will be evenly calculated in head and tail, top and bottom,
5804
6022
  left and right directions possiblily.
5805
6023
  Otherwise, the last extra padding will be calculated from the tail, bottom and the right side.
5806
6024
  If this mode is set, `pad` must be 0.
5807
6025
 
5808
- - valid: Adopts the way of discarding. The possible largest depth, height and width of output
6026
+ - ``"valid"``: Adopts the way of discarding. The possible largest depth, height and width of output
5809
6027
  will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
5810
6028
  must be 0.
5811
6029
 
5812
- - pad: Implicit paddings on both sides of the input in depth, height and width. The number of `pad` will
5813
- be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
6030
+ - ``"pad"``: Implicit paddings on both sides of the input in depth, height and width.
6031
+ The number of `pad` will be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
5814
6032
 
5815
6033
  padding (Union[int, tuple[int], list[int]], optional): The pad value to be filled. If `pad` is an integer,
5816
6034
  the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
@@ -5828,36 +6046,36 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5828
6046
  Returns:
5829
6047
  Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
5830
6048
 
5831
- `pad_mode` is 'same':
6049
+ `pad_mode` is ``"same"``:
5832
6050
 
5833
6051
  .. math::
5834
6052
  \begin{array}{ll} \\
5835
- D_{out} \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
5836
- H_{out} \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
5837
- W_{out} \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
6053
+ D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
6054
+ H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
6055
+ W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
5838
6056
  \end{array}
5839
6057
 
5840
- `pad_mode` is 'valid':
6058
+ `pad_mode` is ``"valid"``:
5841
6059
 
5842
6060
  .. math::
5843
6061
  \begin{array}{ll} \\
5844
- D_{out} \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
6062
+ D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
5845
6063
  {\text{stride[0]}} + 1} \right \rfloor \\
5846
- H_{out} \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
6064
+ H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
5847
6065
  {\text{stride[1]}} + 1} \right \rfloor \\
5848
- W_{out} \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
6066
+ W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
5849
6067
  {\text{stride[2]}} + 1} \right \rfloor \\
5850
6068
  \end{array}
5851
6069
 
5852
- `pad_mode` is 'pad':
6070
+ `pad_mode` is ``"pad"``:
5853
6071
 
5854
6072
  .. math::
5855
6073
  \begin{array}{ll} \\
5856
- D_{out} \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
6074
+ D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
5857
6075
  \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
5858
- H_{out} \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
6076
+ H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
5859
6077
  \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
5860
- W_{out} \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
6078
+ W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
5861
6079
  \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
5862
6080
  \end{array}
5863
6081
 
@@ -6082,7 +6300,7 @@ def glu(x, axis=-1):
6082
6300
  TypeError: If `x` is not a Tensor.
6083
6301
 
6084
6302
  Supported Platforms:
6085
- ``Ascend`` ``CPU``
6303
+ ``Ascend`` ``GPU`` ``CPU``
6086
6304
 
6087
6305
  Examples:
6088
6306
  >>> from mindspore import Tensor, ops
@@ -6128,12 +6346,12 @@ def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean
6128
6346
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6129
6347
  ``'sum'`` . Default: ``'mean'`` .
6130
6348
 
6131
- - ``'none'`` : no reduction will be applied.
6132
- - ``'mean'`` : the sum of the output will be divided by the number of elements in the output.
6133
- - ``'sum'`` : the output will be summed.
6349
+ - ``'none'``: no reduction will be applied.
6350
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
6351
+ - ``'sum'``: the output elements will be summed.
6134
6352
 
6135
6353
  Returns:
6136
- Tensor. If `reduction` is 'none', returns a Tensor with the same shape as `target`.
6354
+ Tensor. If `reduction` is ``'none'``, returns a Tensor with the same shape as `target`.
6137
6355
  Otherwise, it is a scalar.
6138
6356
 
6139
6357
  Raises:
@@ -6200,13 +6418,14 @@ def multilabel_margin_loss(input, target, reduction='mean'):
6200
6418
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6201
6419
  ``'sum'`` . Default: ``'mean'`` .
6202
6420
 
6203
- - ``'none'`` : no reduction will be applied.
6204
- - ``'mean'`` : the sum of the output will be divided by the number of elements in the output.
6205
- - ``'sum'`` : the output will be summed.
6421
+ - ``'none'``: no reduction will be applied.
6422
+ - ``'mean'``: compute and return the mean of elements in the output.
6423
+ - ``'sum'``: the output elements will be summed.
6206
6424
 
6207
6425
  Returns:
6208
- - **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is "none", its shape
6209
- is :math:`(N)`. Otherwise, a scalar value will be returned.
6426
+ - **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss.
6427
+ If `reduction` is ``"none"``, its shape is :math:`(N)`.
6428
+ Otherwise, a scalar value will be returned.
6210
6429
 
6211
6430
  Raises:
6212
6431
  TypeError: If `input` or `target` is not a Tensor.
@@ -6214,7 +6433,7 @@ def multilabel_margin_loss(input, target, reduction='mean'):
6214
6433
  TypeError: If dtype of `target` is not int32.
6215
6434
  ValueError: If length of shape of `input` is neither 1 nor 2.
6216
6435
  ValueError: If shape of `input` is not the same as `target`.
6217
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
6436
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
6218
6437
 
6219
6438
  Supported Platforms:
6220
6439
  ``Ascend`` ``GPU``
@@ -6260,12 +6479,15 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
6260
6479
  input (Tensor): A tensor of shape :math:`(N, C)` , where N is batch size and C is number of classes.
6261
6480
  target (Tensor): The label target Tensor which has the same shape as `input`.
6262
6481
  weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: ``None``.
6263
- reduction (str): Specifies which reduction to be applied to the output. It must be one of
6264
- ``'none'`` , ``'mean'`` , and ``'sum'`` , meaning no reduction, reduce mean and sum on output, respectively.
6265
- Default: ``'mean'`` .
6482
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6483
+ ``'sum'`` . Default: ``'mean'`` .
6484
+
6485
+ - ``'none'``: no reduction will be applied.
6486
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
6487
+ - ``'sum'``: the output elements will be summed.
6266
6488
 
6267
6489
  Returns:
6268
- Tensor, the data type is the same as input, if the reduction is 'none',
6490
+ Tensor, the data type is the same as input, if the `reduction` is ``'none'``,
6269
6491
  its shape is :math:`(N)` , otherwise it is zero.
6270
6492
 
6271
6493
  Raises:
@@ -6409,15 +6631,15 @@ def gelu(input_x, approximate='none'):
6409
6631
 
6410
6632
  x_dtype = _get_cache_prim(P.DType)()(input_x)
6411
6633
  if x_dtype not in [mstype.float16, mstype.float32, mstype.float64]:
6412
- raise TypeError("For gelu, the input dtype must be float16, float32 or float64, "
6413
- "but got {}.".format(x_dtype))
6634
+ raise TypeError(f"For gelu, the input dtype must be float16, float32 or float64, "
6635
+ f"but got {x_dtype}.")
6414
6636
  if approximate == 'tanh':
6415
6637
  output = _get_cache_prim(P.GeLU)()(input_x)
6416
6638
  else:
6417
- output = _get_cache_prim(P.Sqrt)()(Tensor(2.0))
6639
+ output = _get_cache_prim(P.Sqrt)()(Tensor(2.0, x_dtype))
6418
6640
  output = _get_cache_prim(P.Div)()(input_x, output)
6419
- output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0)
6420
- output = input_x * output * Tensor(0.5)
6641
+ output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0, x_dtype)
6642
+ output = input_x * output * Tensor(0.5, x_dtype)
6421
6643
 
6422
6644
  return output
6423
6645
 
@@ -6655,8 +6877,12 @@ def mse_loss(input, target, reduction='mean'):
6655
6877
  target (Tensor): The input label. Tensor of any dimension, same shape as the `input` in common cases.
6656
6878
  However, it supports that the shape of `input` is different from the shape of `target`
6657
6879
  and they should be broadcasted to each other.
6658
- reduction (str, optional): Type of reduction to be applied to loss.
6659
- The optional values are ``"mean"`` , ``"none"`` and ``"sum"`` . Default: ``'mean'`` .
6880
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6881
+ ``'sum'`` . Default: ``'mean'`` .
6882
+
6883
+ - ``'none'``: no reduction will be applied.
6884
+ - ``'mean'``: compute and return the mean of elements in the output.
6885
+ - ``'sum'``: the output elements will be summed.
6660
6886
 
6661
6887
  Returns:
6662
6888
  Tensor, loss of type float, the shape is zero if `reduction` is ``'mean'`` or ``'sum'`` ,
@@ -6759,11 +6985,15 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
6759
6985
  eps (float, optional): Add small value to avoid division by zero. Default: ``1e-06``.
6760
6986
  swap (bool, optional): The distance swap change the negative distance to the distance between positive
6761
6987
  sample and negative sample. Default: ``False`` .
6762
- reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , ``'sum'`` .
6763
- Default: ``'mean'`` .
6988
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6989
+ ``'sum'`` . Default: ``'mean'`` .
6990
+
6991
+ - ``'none'``: no reduction will be applied.
6992
+ - ``'mean'``: compute and return the mean of elements in the output.
6993
+ - ``'sum'``: the output elements will be summed.
6764
6994
 
6765
6995
  Returns:
6766
- Tensor. If `reduction` is "none", its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
6996
+ Tensor. If `reduction` is ``"none"``, its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
6767
6997
 
6768
6998
  Raises:
6769
6999
  TypeError: If `anchor` or `positive` or 'negative' is not a Tensor.
@@ -6776,7 +7006,7 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
6776
7006
  same time.
6777
7007
  ValueError: If the dimension of input `anchor` or `positive` or `negative` is bigger than or equal to 8.
6778
7008
  ValueError: If shape of `anchor`, `positive` and `negative` cannot broadcast.
6779
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
7009
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
6780
7010
 
6781
7011
  Supported Platforms:
6782
7012
  ``GPU``
@@ -6811,7 +7041,7 @@ def linear(x, w, b):
6811
7041
  def _inner_dropout(x, p, training):
6812
7042
  """inner dropout"""
6813
7043
  _dropout = _get_cache_prim(P.Dropout)(1 - p)
6814
- if p > 0. and training:
7044
+ if 0. < p <= 1. and training:
6815
7045
  return _dropout(x)[0]
6816
7046
  return x
6817
7047
 
@@ -6864,10 +7094,11 @@ def _in_projection_packed(q, k, v, w, b, k_is_v, q_is_k):
6864
7094
  return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
6865
7095
 
6866
7096
 
6867
- def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training):
7097
+ def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training, dtype):
6868
7098
  """scaled dot product attention"""
6869
7099
  embed_size = query.shape[-1]
6870
- scaling_factor = Tensor(embed_size, mstype.float32).sqrt().sqrt()
7100
+ embed_size_tensor = scalar_to_tensor_(embed_size, dtype)
7101
+ scaling_factor = embed_size_tensor.sqrt().sqrt()
6871
7102
  query = query / scaling_factor
6872
7103
 
6873
7104
  if is_causal:
@@ -6960,7 +7191,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
6960
7191
  out_proj_bias, training=True, key_padding_mask=None, attn_mask=None,
6961
7192
  use_separate_proj_weight=False, q_proj_weight=None, k_proj_weight=None,
6962
7193
  v_proj_weight=None, static_k=None, static_v=None, average_attn_weights=True,
6963
- is_causal=False, k_is_v=False, q_is_k=False):
7194
+ is_causal=False, k_is_v=False, q_is_k=False, dtype=mstype.float32):
6964
7195
  """multi head attetion forward function"""
6965
7196
  is_batched = _check_qkv_shape(query.ndim, key.ndim, value.ndim)
6966
7197
  if key_padding_mask is not None:
@@ -7117,7 +7348,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
7117
7348
  v = v.view((bsz, num_heads, src_len, head_dim))
7118
7349
 
7119
7350
  attn_output, attn_output_weights = _scaled_dot_product_attention(
7120
- q, k, v, attn_mask, dropout_p, is_causal, training)
7351
+ q, k, v, attn_mask, dropout_p, is_causal, training, dtype)
7121
7352
  attn_output = attn_output.transpose(2, 0, 1, 3).view((bsz * tgt_len, embed_dim))
7122
7353
 
7123
7354
  attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
@@ -7213,6 +7444,82 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
7213
7444
  return out
7214
7445
 
7215
7446
 
7447
+ def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_seq_lengths,
7448
+ actual_seq_lengths_kv, deq_scale1, quant_scale1,
7449
+ deq_scale2, quant_scale2, quant_offset2, num_heads, scale_value=1.0, pre_tokens=2147483547,
7450
+ next_tokens=0, input_layout='BSH',
7451
+ num_key_value_heads=0, sparse_mode=0):
7452
+ r"""
7453
+ The interface for fully inference.
7454
+ B -- Batch size
7455
+ S -- Sequence length
7456
+ H -- Hidden size
7457
+
7458
+ Note:
7459
+ is only supported on ascend910B
7460
+
7461
+ .. warning::
7462
+ This is an experimental API that is subject to change or deletion.
7463
+
7464
+ Inputs:
7465
+ query (Tensor) - The query tensor with data type of float16 or float32.
7466
+ Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
7467
+ key (Tensor) - The key tensor with data type of float16 or float32.
7468
+ Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
7469
+ value (Tensor) - The value tensor with data type of float16 or float32.
7470
+ Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
7471
+ padding_mask (Tensor) - The padding mask tensor with data type of float16 or float32
7472
+ attn_mask (Tensor) - The attention mask tensor with data type of float16 or float32.
7473
+ For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
7474
+ actual_seq_lengths (list[int]): Describe actual sequence length of each input with data type of int.
7475
+ actual_seq_lengths_kv (list[int]): Describe actual sequence length of each input with data type of int.
7476
+ dep_scale1 (Tensor)
7477
+ quant_scale1 (Tensor)
7478
+ deq_scale2 (Tensor)
7479
+ quant_scale2 (Tensor)
7480
+ quant_offset2 (Tensor)
7481
+ num_heads (int): The number of heads.
7482
+ scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
7483
+ Muls in the calculation. Default: 1.0.
7484
+ pre_tokens (int): Previous tokens. Default: 2147483547.
7485
+ next_tokens (int): next tokens. Default: 0.
7486
+ indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
7487
+ indicates that the data blocks in the upper triangle are not involved in the calculation
7488
+ input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
7489
+ num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
7490
+ The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
7491
+ sparse_mode (int): Default: 0
7492
+
7493
+
7494
+ Outputs:
7495
+ attention_out (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
7496
+
7497
+ Supported Platforms:
7498
+ ``Ascend``
7499
+
7500
+ Examples:
7501
+ >>> from mindspore.ops.function.nn_func import prompt_flash_attention
7502
+ >>> from mindspore import Tensor
7503
+ >>> import numpy as np
7504
+ >>> B = 1
7505
+ >>> N = 16
7506
+ >>> S = 256
7507
+ >>> D = 16
7508
+ >>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
7509
+ >>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
7510
+ >>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
7511
+ >>> out = ops.prompt_flash_attention(query, key, value, None, None, None, None, None, None, None, None,
7512
+ None, N, input_layout='BNSD')
7513
+ >>> print(out[0].shape)
7514
+ (1, 16, 256, 16)
7515
+ """
7516
+
7517
+ pfa = _get_cache_prim(NN_OPS.PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
7518
+ num_key_value_heads, sparse_mode)
7519
+ return pfa(query, key, value, padding_mask, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, deq_scale1,
7520
+ quant_scale1, deq_scale2, quant_scale2, quant_offset2)
7521
+
7522
+
7216
7523
  __all__ = [
7217
7524
  'adaptive_avg_pool1d',
7218
7525
  'adaptive_avg_pool2d',
@@ -7260,6 +7567,7 @@ __all__ = [
7260
7567
  'softsign',
7261
7568
  'softshrink',
7262
7569
  'soft_shrink',
7570
+ 'softplus',
7263
7571
  'selu',
7264
7572
  'silu',
7265
7573
  'soft_margin_loss',