mindspore 2.1.0__cp38-none-any.whl → 2.2.0__cp38-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (539) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -1
  3. mindspore/_akg/akg/build_module.py +5 -6
  4. mindspore/_akg/akg/composite/build_module.py +49 -16
  5. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  6. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  7. mindspore/_akg/akg/tvm/api.py +4 -3
  8. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  9. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  10. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  11. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  12. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  13. mindspore/_akg/akg/tvm/build_module.py +16 -1
  14. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  15. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  16. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  17. mindspore/_akg/akg/tvm/module.py +1 -2
  18. mindspore/_akg/akg/tvm/stmt.py +2 -2
  19. mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
  20. mindspore/_akg/akg/utils/kernel_exec.py +58 -260
  21. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  22. mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
  23. mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
  24. mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
  25. mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
  26. mindspore/_check_jit_forbidden_api.py +3 -1
  27. mindspore/_checkparam.py +26 -32
  28. mindspore/_extends/graph_kernel/__init__.py +0 -1
  29. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  30. mindspore/_extends/graph_kernel/splitter.py +1 -9
  31. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
  32. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
  33. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  34. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
  35. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +4 -4
  36. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  37. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  38. mindspore/_extends/parse/__init__.py +12 -15
  39. mindspore/_extends/parse/namespace.py +7 -33
  40. mindspore/_extends/parse/parser.py +61 -71
  41. mindspore/_extends/parse/resources.py +1 -1
  42. mindspore/_extends/parse/standard_method.py +72 -95
  43. mindspore/_extends/parse/trope.py +1 -1
  44. mindspore/_extends/remote/kernel_build_server.py +24 -7
  45. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  46. mindspore/_install_custom.py +43 -0
  47. mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
  48. mindspore/amp.py +47 -11
  49. mindspore/bin/cache_admin +0 -0
  50. mindspore/bin/cache_server +0 -0
  51. mindspore/boost/boost.py +1 -8
  52. mindspore/boost/boost_cell_wrapper.py +3 -2
  53. mindspore/boost/grad_accumulation.py +1 -1
  54. mindspore/boost/group_loss_scale_manager.py +8 -7
  55. mindspore/common/__init__.py +5 -3
  56. mindspore/common/_jit_fallback_utils.py +6 -0
  57. mindspore/common/_register_for_adapter.py +2 -0
  58. mindspore/common/_register_for_tensor.py +2 -2
  59. mindspore/common/_stub_tensor.py +13 -0
  60. mindspore/common/_utils.py +13 -0
  61. mindspore/common/api.py +173 -258
  62. mindspore/common/auto_dynamic_shape.py +498 -0
  63. mindspore/common/dtype.py +18 -11
  64. mindspore/common/dump.py +6 -4
  65. mindspore/common/initializer.py +14 -14
  66. mindspore/common/jit_config.py +33 -15
  67. mindspore/common/lazy_inline.py +126 -7
  68. mindspore/common/mindir_util.py +101 -0
  69. mindspore/common/parameter.py +51 -41
  70. mindspore/common/seed.py +4 -4
  71. mindspore/common/sparse_tensor.py +13 -14
  72. mindspore/common/tensor.py +240 -145
  73. mindspore/communication/__init__.py +7 -4
  74. mindspore/communication/_comm_helper.py +83 -4
  75. mindspore/communication/management.py +152 -84
  76. mindspore/config/op_info.config +13 -2
  77. mindspore/config/super_bar_config.json +4 -2
  78. mindspore/context.py +143 -59
  79. mindspore/dataset/__init__.py +5 -5
  80. mindspore/dataset/audio/__init__.py +2 -2
  81. mindspore/dataset/audio/transforms.py +52 -52
  82. mindspore/dataset/callback/ds_callback.py +16 -2
  83. mindspore/dataset/core/config.py +68 -51
  84. mindspore/dataset/engine/cache_client.py +28 -5
  85. mindspore/dataset/engine/datasets.py +250 -112
  86. mindspore/dataset/engine/datasets_audio.py +43 -211
  87. mindspore/dataset/engine/datasets_standard_format.py +11 -35
  88. mindspore/dataset/engine/datasets_text.py +43 -67
  89. mindspore/dataset/engine/datasets_user_defined.py +86 -100
  90. mindspore/dataset/engine/datasets_vision.py +219 -1029
  91. mindspore/dataset/engine/iterators.py +11 -4
  92. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
  93. mindspore/dataset/engine/obs/util.py +3 -0
  94. mindspore/dataset/engine/samplers.py +1 -1
  95. mindspore/dataset/engine/validators.py +19 -5
  96. mindspore/dataset/text/__init__.py +3 -3
  97. mindspore/dataset/text/transforms.py +101 -127
  98. mindspore/dataset/text/utils.py +205 -138
  99. mindspore/dataset/transforms/__init__.py +1 -1
  100. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  101. mindspore/dataset/transforms/transforms.py +95 -40
  102. mindspore/dataset/utils/browse_dataset.py +8 -2
  103. mindspore/dataset/utils/line_reader.py +17 -19
  104. mindspore/dataset/vision/__init__.py +3 -3
  105. mindspore/dataset/vision/c_transforms.py +6 -3
  106. mindspore/dataset/vision/transforms.py +409 -287
  107. mindspore/dataset/vision/utils.py +13 -14
  108. mindspore/dataset/vision/validators.py +11 -1
  109. mindspore/experimental/map_parameter.py +14 -0
  110. mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
  111. mindspore/{nn/optim_ex → experimental/optim}/adam.py +59 -66
  112. mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
  113. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  114. mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
  115. mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
  116. mindspore/gen_ops.py +273 -0
  117. mindspore/include/OWNERS +0 -1
  118. mindspore/include/api/data_type.h +2 -1
  119. mindspore/include/api/graph.h +0 -15
  120. mindspore/include/api/kernel.h +2 -0
  121. mindspore/include/api/kernel_api.h +37 -12
  122. mindspore/include/api/model.h +0 -14
  123. mindspore/include/api/types.h +37 -4
  124. mindspore/include/c_api/ms/abstract.h +67 -0
  125. mindspore/include/c_api/ms/attribute.h +197 -0
  126. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  127. mindspore/include/c_api/ms/base/macros.h +32 -0
  128. mindspore/include/c_api/ms/base/status.h +33 -0
  129. mindspore/include/c_api/ms/base/types.h +282 -0
  130. mindspore/include/c_api/ms/context.h +102 -0
  131. mindspore/include/c_api/ms/graph.h +160 -0
  132. mindspore/include/c_api/ms/node.h +606 -0
  133. mindspore/include/c_api/ms/tensor.h +161 -0
  134. mindspore/include/c_api/ms/value.h +84 -0
  135. mindspore/include/dataset/constants.h +6 -5
  136. mindspore/include/dataset/execute.h +23 -13
  137. mindspore/include/dataset/text.h +26 -26
  138. mindspore/include/dataset/transforms.h +13 -13
  139. mindspore/include/dataset/vision.h +60 -60
  140. mindspore/include/dataset/vision_ascend.h +5 -6
  141. mindspore/include/dataset/vision_lite.h +17 -17
  142. mindspore/include/mindapi/base/type_id.h +1 -0
  143. mindspore/include/mindapi/base/types.h +1 -0
  144. mindspore/lib/libdnnl.so.2 +0 -0
  145. mindspore/lib/libjemalloc.so.2 +0 -0
  146. mindspore/lib/libmindspore.so +0 -0
  147. mindspore/lib/libmindspore_backend.so +0 -0
  148. mindspore/lib/libmindspore_common.so +0 -0
  149. mindspore/lib/libmindspore_core.so +0 -0
  150. mindspore/lib/libmindspore_glog.so.0 +0 -0
  151. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  152. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  153. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  154. mindspore/lib/libmindspore_shared_lib.so +0 -0
  155. mindspore/lib/libnnacl.so +0 -0
  156. mindspore/lib/libopencv_core.so.4.5 +0 -0
  157. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  158. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  159. mindspore/lib/libps_cache.so +0 -0
  160. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  161. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  162. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
  163. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  164. mindspore/lib/plugin/ascend/libakg.so +0 -0
  165. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  166. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  167. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  168. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  169. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  170. mindspore/lib/plugin/cpu/libakg.so +0 -0
  171. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  172. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  173. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  174. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  175. mindspore/nn/__init__.py +0 -2
  176. mindspore/nn/cell.py +316 -74
  177. mindspore/nn/dynamic_lr.py +21 -21
  178. mindspore/nn/layer/activation.py +21 -28
  179. mindspore/nn/layer/basic.py +15 -13
  180. mindspore/nn/layer/channel_shuffle.py +1 -1
  181. mindspore/nn/layer/container.py +271 -9
  182. mindspore/nn/layer/conv.py +310 -207
  183. mindspore/nn/layer/dense.py +8 -5
  184. mindspore/nn/layer/embedding.py +33 -27
  185. mindspore/nn/layer/flash_attention.py +82 -41
  186. mindspore/nn/layer/image.py +8 -6
  187. mindspore/nn/layer/math.py +13 -18
  188. mindspore/nn/layer/normalization.py +107 -66
  189. mindspore/nn/layer/padding.py +1 -1
  190. mindspore/nn/layer/pooling.py +131 -109
  191. mindspore/nn/layer/rnn_cells.py +22 -17
  192. mindspore/nn/layer/rnns.py +13 -16
  193. mindspore/nn/layer/thor_layer.py +1 -1
  194. mindspore/nn/layer/transformer.py +221 -154
  195. mindspore/nn/learning_rate_schedule.py +9 -1
  196. mindspore/nn/loss/loss.py +235 -174
  197. mindspore/nn/optim/ada_grad.py +2 -1
  198. mindspore/nn/optim/adadelta.py +1 -0
  199. mindspore/nn/optim/adafactor.py +2 -1
  200. mindspore/nn/optim/adam.py +7 -4
  201. mindspore/nn/optim/adamax.py +3 -2
  202. mindspore/nn/optim/adasum.py +2 -2
  203. mindspore/nn/optim/asgd.py +2 -3
  204. mindspore/nn/optim/ftrl.py +6 -5
  205. mindspore/nn/optim/lamb.py +7 -4
  206. mindspore/nn/optim/lars.py +1 -1
  207. mindspore/nn/optim/lazyadam.py +5 -3
  208. mindspore/nn/optim/momentum.py +2 -1
  209. mindspore/nn/optim/optimizer.py +53 -4
  210. mindspore/nn/optim/proximal_ada_grad.py +3 -4
  211. mindspore/nn/optim/rmsprop.py +4 -3
  212. mindspore/nn/optim/rprop.py +23 -12
  213. mindspore/nn/optim/sgd.py +26 -11
  214. mindspore/nn/optim/thor.py +9 -7
  215. mindspore/nn/probability/bijector/bijector.py +5 -5
  216. mindspore/nn/probability/bijector/power_transform.py +27 -27
  217. mindspore/nn/probability/bijector/softplus.py +3 -3
  218. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
  219. mindspore/nn/probability/distribution/bernoulli.py +5 -5
  220. mindspore/nn/probability/distribution/beta.py +3 -3
  221. mindspore/nn/probability/distribution/categorical.py +7 -7
  222. mindspore/nn/probability/distribution/cauchy.py +0 -1
  223. mindspore/nn/probability/distribution/distribution.py +3 -3
  224. mindspore/nn/probability/distribution/gamma.py +3 -3
  225. mindspore/nn/probability/distribution/geometric.py +4 -4
  226. mindspore/nn/probability/distribution/gumbel.py +4 -4
  227. mindspore/nn/probability/distribution/log_normal.py +2 -2
  228. mindspore/nn/probability/distribution/logistic.py +2 -2
  229. mindspore/nn/probability/distribution/poisson.py +4 -4
  230. mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
  231. mindspore/nn/probability/distribution/uniform.py +6 -6
  232. mindspore/nn/wrap/cell_wrapper.py +78 -34
  233. mindspore/nn/wrap/grad_reducer.py +8 -5
  234. mindspore/nn/wrap/loss_scale.py +105 -42
  235. mindspore/numpy/array_creations.py +1 -2
  236. mindspore/numpy/array_ops.py +3 -2
  237. mindspore/offline_debug/convert_async.py +2 -2
  238. mindspore/ops/_grad_experimental/__init__.py +0 -5
  239. mindspore/ops/_grad_experimental/grad_array_ops.py +1 -2
  240. mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
  241. mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
  242. mindspore/ops/_grad_experimental/grad_implementations.py +10 -0
  243. mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
  244. mindspore/ops/_grad_experimental/grad_math_ops.py +0 -181
  245. mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
  246. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  247. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +165 -109
  248. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +144 -86
  249. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +172 -187
  250. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +51 -57
  251. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +6 -17
  252. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +1 -1
  253. mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
  254. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  255. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  256. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  257. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  258. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  259. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  260. mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
  261. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  262. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  263. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  264. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  265. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  266. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  267. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  268. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
  269. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
  270. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  271. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  272. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  273. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  274. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  275. mindspore/ops/_op_impl/tbe/__init__.py +4 -4
  276. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  277. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  278. mindspore/ops/_primitive_cache.py +1 -1
  279. mindspore/ops/_tracefunc.py +45 -13
  280. mindspore/ops/_utils/utils.py +4 -1
  281. mindspore/ops/_vmap/vmap_array_ops.py +3 -3
  282. mindspore/ops/_vmap/vmap_base.py +3 -3
  283. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  284. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  285. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  286. mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
  287. mindspore/ops/arg_dtype_cast.py +54 -0
  288. mindspore/ops/composite/base.py +37 -10
  289. mindspore/ops/composite/math_ops.py +5 -4
  290. mindspore/ops/composite/multitype_ops/_compile_utils.py +273 -72
  291. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
  292. mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
  293. mindspore/ops/composite/multitype_ops/getitem_impl.py +40 -2
  294. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  295. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  296. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  297. mindspore/ops/deprecated.py +304 -0
  298. mindspore/ops/function/__init__.py +4 -1
  299. mindspore/ops/function/array_func.py +167 -189
  300. mindspore/ops/function/clip_func.py +81 -13
  301. mindspore/ops/function/debug_func.py +1 -1
  302. mindspore/ops/function/grad/grad_func.py +18 -8
  303. mindspore/ops/function/image_func.py +10 -4
  304. mindspore/ops/function/linalg_func.py +5 -5
  305. mindspore/ops/function/math_func.py +575 -386
  306. mindspore/ops/function/nn_func.py +470 -251
  307. mindspore/ops/function/random_func.py +86 -56
  308. mindspore/ops/function/sparse_func.py +1 -1
  309. mindspore/ops/function/sparse_unary_func.py +14 -12
  310. mindspore/ops/function/vmap_func.py +6 -5
  311. mindspore/ops/functional.py +15 -10
  312. mindspore/ops/op_info_register.py +235 -19
  313. mindspore/ops/operations/__init__.py +25 -17
  314. mindspore/ops/operations/_grad_ops.py +52 -7
  315. mindspore/ops/operations/_inner_ops.py +213 -12
  316. mindspore/ops/operations/_quant_ops.py +4 -8
  317. mindspore/ops/operations/_sequence_ops.py +42 -0
  318. mindspore/ops/operations/array_ops.py +64 -280
  319. mindspore/ops/operations/comm_ops.py +105 -57
  320. mindspore/ops/operations/custom_ops.py +10 -3
  321. mindspore/ops/operations/debug_ops.py +8 -4
  322. mindspore/ops/operations/image_ops.py +18 -12
  323. mindspore/ops/operations/math_ops.py +185 -138
  324. mindspore/ops/operations/nn_ops.py +716 -492
  325. mindspore/ops/operations/other_ops.py +0 -22
  326. mindspore/ops/operations/random_ops.py +53 -111
  327. mindspore/ops/operations/sparse_ops.py +3 -1
  328. mindspore/ops/primitive.py +24 -18
  329. mindspore/parallel/_auto_parallel_context.py +68 -8
  330. mindspore/parallel/_cost_model_context.py +2 -2
  331. mindspore/parallel/_offload_context.py +17 -3
  332. mindspore/parallel/_parallel_serialization.py +2 -2
  333. mindspore/parallel/_ps_context.py +12 -0
  334. mindspore/parallel/_tensor.py +14 -12
  335. mindspore/parallel/_transformer/layers.py +5 -3
  336. mindspore/parallel/_transformer/loss.py +1 -0
  337. mindspore/parallel/_transformer/moe.py +2 -2
  338. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  339. mindspore/parallel/_transformer/transformer.py +23 -3
  340. mindspore/parallel/_utils.py +11 -7
  341. mindspore/parallel/algo_parameter_config.py +85 -5
  342. mindspore/parallel/checkpoint_transform.py +6 -10
  343. mindspore/parallel/shard.py +4 -4
  344. mindspore/profiler/common/struct_type.py +3 -3
  345. mindspore/profiler/common/util.py +3 -2
  346. mindspore/profiler/envprofiling.py +1 -1
  347. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  348. mindspore/profiler/parser/ascend_flops_generator.py +2 -2
  349. mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
  350. mindspore/profiler/parser/ascend_hccl_generator.py +17 -12
  351. mindspore/profiler/parser/ascend_msprof_exporter.py +104 -252
  352. mindspore/profiler/parser/ascend_msprof_generator.py +8 -8
  353. mindspore/profiler/parser/ascend_op_generator.py +5 -5
  354. mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
  355. mindspore/profiler/parser/ascend_timeline_generator.py +9 -6
  356. mindspore/profiler/parser/base_timeline_generator.py +9 -7
  357. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +14 -10
  358. mindspore/profiler/parser/flops_parser.py +15 -11
  359. mindspore/profiler/parser/framework_parser.py +37 -21
  360. mindspore/profiler/parser/hccl_parser.py +16 -12
  361. mindspore/profiler/parser/integrator.py +22 -11
  362. mindspore/profiler/parser/memory_usage_parser.py +2 -2
  363. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  364. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  365. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  366. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  367. mindspore/profiler/parser/optime_parser.py +1 -1
  368. mindspore/profiler/parser/profiler_info.py +2 -2
  369. mindspore/profiler/parser/step_trace_parser.py +11 -14
  370. mindspore/profiler/profiling.py +139 -71
  371. mindspore/rewrite/api/node.py +102 -19
  372. mindspore/rewrite/api/node_type.py +5 -1
  373. mindspore/rewrite/api/scoped_value.py +9 -17
  374. mindspore/rewrite/api/symbol_tree.py +131 -47
  375. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  376. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  377. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  378. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
  379. mindspore/rewrite/common/rewrite_elog.py +5 -1
  380. mindspore/rewrite/namer.py +33 -24
  381. mindspore/rewrite/namespace.py +14 -5
  382. mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
  383. mindspore/rewrite/node/call_function.py +79 -0
  384. mindspore/rewrite/node/cell_container.py +135 -0
  385. mindspore/rewrite/node/control_flow.py +88 -0
  386. mindspore/rewrite/{node.py → node/node.py} +273 -234
  387. mindspore/rewrite/node/node_manager.py +254 -0
  388. mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
  389. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  390. mindspore/rewrite/parsers/assign_parser.py +216 -221
  391. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  392. mindspore/rewrite/parsers/class_def_parser.py +174 -113
  393. mindspore/rewrite/parsers/constant_parser.py +9 -6
  394. mindspore/rewrite/parsers/container_parser.py +9 -7
  395. mindspore/rewrite/parsers/for_parser.py +36 -15
  396. mindspore/rewrite/parsers/function_def_parser.py +24 -16
  397. mindspore/rewrite/parsers/if_parser.py +28 -24
  398. mindspore/rewrite/parsers/module_parser.py +196 -25
  399. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  400. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  401. mindspore/rewrite/parsers/return_parser.py +6 -6
  402. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  403. mindspore/rewrite/sparsify/utils.py +1 -1
  404. mindspore/rewrite/symbol_tree.py +525 -577
  405. mindspore/rewrite/symbol_tree_builder.py +9 -193
  406. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  407. mindspore/run_check/_check_version.py +2 -2
  408. mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
  409. mindspore/safeguard/rewrite_obfuscation.py +517 -0
  410. mindspore/scipy/linalg.py +1 -1
  411. mindspore/scipy/optimize/minimize.py +7 -3
  412. mindspore/train/_utils.py +7 -3
  413. mindspore/train/amp.py +323 -123
  414. mindspore/train/anf_ir_pb2.py +14 -2
  415. mindspore/train/callback/_backup_and_restore.py +2 -12
  416. mindspore/train/callback/_callback.py +29 -4
  417. mindspore/train/callback/_checkpoint.py +23 -8
  418. mindspore/train/callback/_early_stop.py +2 -2
  419. mindspore/train/callback/_landscape.py +4 -4
  420. mindspore/train/callback/_loss_monitor.py +2 -2
  421. mindspore/train/callback/_on_request_exit.py +2 -2
  422. mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
  423. mindspore/train/callback/_summary_collector.py +14 -7
  424. mindspore/train/callback/_time_monitor.py +58 -5
  425. mindspore/train/data_sink.py +5 -11
  426. mindspore/train/dataset_helper.py +83 -57
  427. mindspore/train/loss_scale_manager.py +2 -2
  428. mindspore/train/metrics/__init__.py +3 -3
  429. mindspore/train/metrics/cosine_similarity.py +1 -1
  430. mindspore/train/metrics/hausdorff_distance.py +3 -2
  431. mindspore/train/metrics/mean_surface_distance.py +3 -2
  432. mindspore/train/metrics/metric.py +39 -19
  433. mindspore/train/metrics/roc.py +2 -2
  434. mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
  435. mindspore/train/mind_ir_pb2.py +85 -36
  436. mindspore/train/model.py +185 -45
  437. mindspore/train/serialization.py +390 -150
  438. mindspore/train/summary/_writer_pool.py +3 -2
  439. mindspore/train/summary/summary_record.py +14 -10
  440. mindspore/train/train_thor/convert_utils.py +3 -3
  441. mindspore/train/train_thor/dataset_helper.py +1 -1
  442. mindspore/version.py +1 -1
  443. {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/METADATA +6 -7
  444. {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/RECORD +447 -507
  445. {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
  446. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  447. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  448. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  449. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  450. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  451. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  452. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  453. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  454. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  455. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  456. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  457. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  458. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  459. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  460. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  461. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  462. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  463. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  464. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  465. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  466. mindspore/_extends/graph_kernel/expander.py +0 -80
  467. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
  468. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  469. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  470. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  471. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  472. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  473. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  474. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  475. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  476. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  477. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  478. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  479. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  480. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  481. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  482. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  483. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  484. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  485. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  486. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  487. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  488. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  489. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  490. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  491. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  492. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  493. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  494. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  495. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  496. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  497. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  498. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  499. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  500. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  501. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  502. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  503. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  504. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  505. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  506. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  507. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  508. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  509. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  510. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  511. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  512. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  513. mindspore/dataset/datapreprocess/__init__.py +0 -20
  514. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  515. mindspore/include/api/net.h +0 -142
  516. mindspore/nn/lr_scheduler.py +0 -262
  517. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
  518. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
  519. mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
  520. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  521. mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
  522. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
  523. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
  524. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
  525. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  526. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  527. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
  528. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
  529. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  530. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  531. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
  532. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
  533. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
  534. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
  535. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
  536. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  537. mindspore/rewrite/node_visitor.py +0 -44
  538. {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
  539. {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
@@ -27,7 +27,7 @@ from mindspore.ops.operations import nn_ops as NN_OPS
27
27
  from mindspore.ops.operations import _sequence_ops as seq
28
28
  import mindspore.common.dtype as mstype
29
29
  from mindspore.ops.function.math_func import logsumexp
30
- from mindspore.ops.function.random_func import _get_seed
30
+ from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
31
31
  from mindspore.common.tensor import Tensor
32
32
  from mindspore._c_expression import Tensor as Tensor_
33
33
  from mindspore.ops._primitive_cache import _get_cache_prim
@@ -40,6 +40,7 @@ from mindspore.ops.operations.nn_ops import ChannelShuffle
40
40
  from mindspore.ops.operations.nn_ops import TripletMarginLoss
41
41
  from mindspore.ops.operations._inner_ops import SiLU
42
42
  from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
43
+ from mindspore.common.api import _function_forbid_reuse
43
44
 
44
45
  slice_ = P.Slice()
45
46
  fast_gelu_ = P.FastGeLU()
@@ -232,7 +233,7 @@ def adaptive_avg_pool3d(input, output_size):
232
233
  def _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad):
233
234
  """Checks the type of avgpool1d input"""
234
235
  validator.check_value_type('kernel_size', kernel_size, [int], 'avg_pool1d')
235
- validator.check_value_type('stride', stride, [int], 'avg_pool1d')
236
+ validator.check_value_type('stride', stride, (int, tuple), 'avg_pool1d')
236
237
  validator.check_value_type('ceil_mode', ceil_mode, bool, 'avg_pool1d')
237
238
  validator.check_value_type('count_include_pad', count_include_pad, bool, 'avg_pool1d')
238
239
  validator.check_int(kernel_size, 1, validator.GE, "kernel_size", 'avg_pool1d')
@@ -263,12 +264,10 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
263
264
  Args:
264
265
  input_x (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})`.
265
266
  kernel_size (int): The size of kernel window used to take the average value. Default: ``1`` .
266
- stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents the height and
267
- width of movement are both strides, or a tuple of two int numbers that represent height and width of
268
- movement respectively. Default: ``1`` .
269
- padding (Union(int, tuple[int])): The pad value to be filled. If `padding` is an integer, the paddings of left
270
- and right are the same, equal to pad. If `padding` is a tuple of `2` integers, the padding of left and right
271
- equal to `padding[0]` and `padding[1]` correspondingly. Default: ``0`` .
267
+ stride (Union(int, tuple[int])): The distance of kernel moving. `stride` can either be an int
268
+ number or a tuple of one int number. Default: ``1`` .
269
+ padding (Union(int, tuple[int])): The pad value to be filled. `padding` can either be an integer
270
+ or a tuple of one integer. Default: ``0`` .
272
271
  ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
273
272
  count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
274
273
 
@@ -300,20 +299,25 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
300
299
  raise TypeError("For avg_pool1d, the input input_x must be tensor")
301
300
 
302
301
  if len(input_x.shape) != 3:
303
- raise ValueError("For avg_pool1d, input must have 3 dim, but got {}.".format(len(input_x.shape)))
302
+ raise ValueError(f"For avg_pool1d, input must have 3 dim, but got {len(input_x.shape)}.")
304
303
 
305
304
  _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad)
306
305
  if isinstance(padding, int):
307
306
  check_non_negative_int(padding, 'padding', 'avg_pool1d')
308
307
  padding = (0, 0, 0, 0, padding, padding)
309
308
  elif isinstance(padding, tuple):
310
- if len(padding) != 2:
311
- raise ValueError("For avg_pool1d, padding should be int or tuple of length 2.")
309
+ if len(padding) != 1:
310
+ raise ValueError("For avg_pool1d, padding should be int or tuple of length 1.")
312
311
  for item in padding:
313
312
  check_non_negative_int(item, 'padding', 'avg_pool1d')
314
- padding = (0, 0, 0, 0, padding[0], padding[1])
313
+ padding = (0, 0, 0, 0, padding[0], padding[0])
315
314
  else:
316
- raise TypeError("For avg_pool1d, padding should be int or tuple of length 2.")
315
+ raise TypeError("For avg_pool1d, padding should be int or tuple of length 1.")
316
+
317
+ if isinstance(stride, tuple):
318
+ if len(stride) != 1:
319
+ raise ValueError("For avg_pool1d, stride should be int or tuple of length 1.")
320
+ stride = stride[0]
317
321
 
318
322
  expand_op = _get_cache_prim(P.ExpandDims)()
319
323
  squeeze_op = _get_cache_prim(P.Squeeze)((2, 3))
@@ -419,7 +423,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
419
423
  ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
420
424
  count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
421
425
  divisor_override (int): If specified, it will be used as divisor in the averaging calculation, otherwise
422
- `kernel_size` will be used. Default: ``0`` .
426
+ `kernel_size` will be used. Default: ``0``, which means not specified.
423
427
 
424
428
  Returns:
425
429
  Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
@@ -456,7 +460,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
456
460
  raise TypeError("For avg_pool2d, the input input_x must be tensor")
457
461
 
458
462
  if len(input_x.shape) != 4:
459
- raise ValueError("For avg_pool2d, input must have 4 dim, but got {}.".format(len(input_x.shape)))
463
+ raise ValueError(f"For avg_pool2d, input must have 4 dim, but got {len(input_x.shape)}.")
460
464
 
461
465
  kernel_size = _check_avgpool_2d_kernel_size(kernel_size)
462
466
  stride = _check_avgpool_2d_stride(stride)
@@ -527,7 +531,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
527
531
  count_include_pad (bool, optional): If ``True`` , averaging calculation
528
532
  will include the zero-padding. Default: ``True`` .
529
533
  divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
530
- otherwise `kernel_size` will be used. Default: ``0`` .
534
+ otherwise `kernel_size` will be used. Default: ``0`` , which means not specified.
531
535
 
532
536
  Returns:
533
537
  Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the same data type with `input_x`.
@@ -560,7 +564,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
560
564
  raise TypeError("For avg_pool3d, the input input_x must be tensor")
561
565
 
562
566
  if len(input_x.shape) != 5:
563
- raise ValueError("For avg_pool3d, input must have 5 dim, but got {}.".format(len(input_x.shape)))
567
+ raise ValueError(f"For avg_pool3d, input must have 5 dim, but got {len(input_x.shape)}.")
564
568
 
565
569
  _check_avg_pool3d_padding(padding)
566
570
 
@@ -637,21 +641,21 @@ def adaptive_max_pool1d(input, output_size):
637
641
  x_dtype = _get_cache_prim(P.DType)()(input)
638
642
 
639
643
  if len(x_in_shape) != 3:
640
- raise ValueError("For adaptive_max_pool1d input must have 3 dim, but got {}.".format(len(x_in_shape)))
644
+ raise ValueError(f"For adaptive_max_pool1d input must have 3 dim, but got {len(x_in_shape)}.")
641
645
  if x_in_shape[2] < output_size:
642
- raise ValueError("For adaptive_max_pool1d input's last dimension must be greater or equal to "
643
- "output size {}, but got {}.".format(output_size, x_in_shape[2]))
646
+ raise ValueError(f"For adaptive_max_pool1d input's last dimension must be greater or equal to "
647
+ f"output size {output_size}, but got {x_in_shape[2]}.")
644
648
  if x_in_shape[2] % output_size != 0:
645
- raise ValueError("For adaptive_max_pool1d input's last dimension must be divisible by "
646
- "output size {}, but got {}.".format(output_size, x_in_shape[2]))
649
+ raise ValueError(f"For adaptive_max_pool1d input's last dimension must be divisible by "
650
+ f"output size {output_size}, but got {x_in_shape[2]}.")
647
651
  if is_ascend_backend():
648
652
  if x_dtype not in [mstype.float16]:
649
- raise TypeError("For adaptive_max_pool1d in Ascend platform, the input dtype must be float16, "
650
- "but got {}.".format(x_dtype))
653
+ raise TypeError(f"For adaptive_max_pool1d in Ascend platform, the input dtype must be float16, "
654
+ f"but got {x_dtype}.")
651
655
  else:
652
656
  if x_dtype not in [mstype.float16, mstype.float32]:
653
- raise TypeError("For adaptive_max_pool1d, the input dtype must be float16 or float32, "
654
- "but got {}.".format(x_dtype))
657
+ raise TypeError(f"For adaptive_max_pool1d, the input dtype must be float16 or float32, "
658
+ f"but got {x_dtype}.")
655
659
 
656
660
  expand_ = _get_cache_prim(P.ExpandDims)()
657
661
  squeeze_ = _get_cache_prim(P.Squeeze)(2)
@@ -1147,7 +1151,7 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
1147
1151
  return out
1148
1152
 
1149
1153
 
1150
- def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reduction='mean'):
1154
+ def binary_cross_entropy_with_logits(logits, label, weight=None, pos_weight=None, reduction='mean'):
1151
1155
  r"""
1152
1156
  Adds sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy
1153
1157
  between the logits and the label.
@@ -1177,7 +1181,7 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1177
1181
 
1178
1182
  This operator will multiply the output by the corresponding weight.
1179
1183
  The tensor :math:`weight` assigns different weights to each piece of data in the batch,
1180
- and the tensor :math:`pos_weight` adds corresponding weights to the positive examples of each category.
1184
+ and the tensor :math:`pos\_weight` adds corresponding weights to the positive examples of each category.
1181
1185
 
1182
1186
  In addition, it can trade off recall and precision by adding weights to positive examples.
1183
1187
  In the case of multi-label classification the loss can be described as:
@@ -1196,17 +1200,21 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1196
1200
  logits (Tensor): Input logits. Data type must be float16 or float32.
1197
1201
  label (Tensor): Ground truth label, has the same shape as `logits`.
1198
1202
  Data type must be float16 or float32.
1199
- weight (Tensor): A rescaling weight applied to the loss of each batch element. It can be
1203
+ weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. It can be
1200
1204
  broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
1201
- pos_weight (Tensor): A weight of positive examples. Must be a vector with length equal to the
1205
+ Default: ``None``, `weight` is a Tensor whose value is ``1``.
1206
+ pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
1202
1207
  number of classes. It can be broadcast to a tensor with shape of `logits`.
1203
- Data type must be float16 or float32.
1204
- reduction (str): Type of reduction to be applied to loss. The optional values
1205
- are ``'mean'`` , ``'sum'`` , and ``'none'`` ,
1206
- not case sensitive. If ``'none'`` , do not perform reduction. Default: ``'mean'`` .
1208
+ Data type must be float16 or float32. Default: ``None``, `pos_weight` is a Tensor whose value is ``1``.
1209
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
1210
+ ``'sum'`` . Default: ``'mean'`` .
1211
+
1212
+ - ``'none'``: no reduction will be applied.
1213
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
1214
+ - ``'sum'``: the output elements will be summed.
1207
1215
 
1208
1216
  Returns:
1209
- Tensor or Scalar, if `reduction` is 'none', it's a tensor with the same shape and type as input `logits`.
1217
+ Tensor or Scalar, if `reduction` is ``'none'``, it's a tensor with the same shape and type as input `logits`.
1210
1218
  Otherwise, the output is a scalar.
1211
1219
 
1212
1220
  Raises:
@@ -1214,7 +1222,7 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1214
1222
  TypeError: If data type of input `logits`, `label`, `weight`, `pos_weight` is neither float16 nor float32.
1215
1223
  TypeError: If data type of input `reduction` is not string.
1216
1224
  ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`.
1217
- ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
1225
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
1218
1226
 
1219
1227
  Supported Platforms:
1220
1228
  ``Ascend`` ``GPU`` ``CPU``
@@ -1232,10 +1240,15 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1232
1240
  0.3463612
1233
1241
  """
1234
1242
 
1243
+ if weight is None:
1244
+ weight = ops.ones_like(logits)
1245
+ if pos_weight is None:
1246
+ pos_weight = ops.ones_like(logits)
1235
1247
  bce_with_logits_loss_op = _get_cache_prim(NN_OPS.BCEWithLogitsLoss)(reduction)
1236
1248
  return bce_with_logits_loss_op(logits, label, weight, pos_weight)
1237
1249
 
1238
1250
 
1251
+ @_function_forbid_reuse
1239
1252
  def dropout(input, p=0.5, training=True, seed=None):
1240
1253
  r"""
1241
1254
  During training, randomly zeroes some of the elements of the input tensor
@@ -1275,7 +1288,9 @@ def dropout(input, p=0.5, training=True, seed=None):
1275
1288
  return input
1276
1289
  keep_prob = 1 - p
1277
1290
  seed0, seed1 = _get_seed(seed, "dropout")
1278
- out, _ = P.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)(input)
1291
+ dropout_op = P.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)
1292
+ dropout_op = _set_prim_op_user_data(dropout_op, "random_cache", False)
1293
+ out, _ = dropout_op(input)
1279
1294
  return out
1280
1295
 
1281
1296
 
@@ -1820,7 +1835,7 @@ def kl_div(logits, labels, reduction='mean'):
1820
1835
  Its value must be one of ``'none'`` , ``'mean'`` , ``'batchmean'`` or ``'sum'`` . Default: ``'mean'`` .
1821
1836
 
1822
1837
  Returns:
1823
- Tensor or Scalar, if `reduction` is 'none', then output is a tensor and has the same shape as `logits`.
1838
+ Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`.
1824
1839
  Otherwise, it is a scalar.
1825
1840
 
1826
1841
  Raises:
@@ -2220,7 +2235,9 @@ def interpolate(input,
2220
2235
  One and only one of size and scale_factor can be set to None. Default: ``None`` .
2221
2236
  mode (str): The sampling algorithm.
2222
2237
  One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
2223
- 'area', 'nearest-exact'(3D and 4D). Default: ``"nearest"`` .
2238
+ 'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
2239
+ knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
2240
+
2224
2241
  align_corners (bool): If True, rescale input by :math:`(new\_height - 1) / (height - 1)`, which exactly
2225
2242
  aligns the corners of data and resized data. If False, rescale by :math:`new\_height / height`.
2226
2243
  Default: ``None`` .
@@ -2568,10 +2585,12 @@ def soft_margin_loss(input, target, reduction='mean'):
2568
2585
  Args:
2569
2586
  input (Tensor): Predict data. Data type must be float16 or float32.
2570
2587
  target (Tensor): Ground truth data, with the same type and shape as `logits`.
2571
- reduction (str, optional): Implements the reduction method to the output with ``'none'`` , ``'mean'`` ,
2572
- or ``'sum'`` ,
2573
- respectively indicate that no calculation is specified, that the mean is used, and that is calculated
2574
- using summation. Default: ``'mean'`` .
2588
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2589
+ ``'sum'`` . Default: ``'mean'`` .
2590
+
2591
+ - ``'none'``: no reduction will be applied.
2592
+ - ``'mean'``: compute and return the mean of elements in the output.
2593
+ - ``'sum'``: the output elements will be summed.
2575
2594
 
2576
2595
  Outputs:
2577
2596
  Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `logits`.
@@ -2751,6 +2770,55 @@ def soft_shrink(input, lambd=0.5):
2751
2770
  return soft_shrink_op(input)
2752
2771
 
2753
2772
 
2773
+ def softplus(input, beta=1, threshold=20): # pylint:disable=redefined-outer-name
2774
+ r"""
2775
+ Applies softplus function to `input` element-wise.
2776
+
2777
+ The softplus function is shown as follows, x is the element of `input` :
2778
+
2779
+ .. math::
2780
+
2781
+ \text{output} = \frac{1}{beta}\log(1 + \exp(\text{beta * x}))
2782
+
2783
+ When :math:`input * beta > threshold`, the implementation converts to the linear function
2784
+ to ensure numerical stability.
2785
+
2786
+ Args:
2787
+ input (Tensor) - Tensor of any dimension.
2788
+ Supported dtypes:
2789
+
2790
+ - GPU/CPU: float16, float32, float64.
2791
+ - Ascend: float16, float32.
2792
+
2793
+ beta (int, optional) - The :math:`\beta` value in softplus function. Default: ``1`` .
2794
+ threshold (int, optional) - When :math:`input * beta > threshold`, converting softplus to a linear function.
2795
+ Default: ``20`` .
2796
+
2797
+ Returns:
2798
+ Tensor, with the same type and shape as the `input` .
2799
+
2800
+ Raises:
2801
+ TypeError: If `input` is not a Tensor.
2802
+ TypeError: If the dtype of `input` is not float16, float32 or float64.
2803
+
2804
+ Supported Platforms:
2805
+ ``Ascend`` ``GPU`` ``CPU``
2806
+
2807
+ Examples:
2808
+ >>> import mindspore
2809
+ >>> import numpy as np
2810
+ >>> from mindspore import Tensor, ops
2811
+ >>> input = Tensor(np.array([0.1, 0.2, 30, 25]), mindspore.float32)
2812
+ >>> output = ops.softplus(input)
2813
+ >>> print(output)
2814
+ [0.7443967 0.79813886 30. 25.]
2815
+ """
2816
+ softplus_op = _get_cache_prim(P.Softplus)()
2817
+ scaling_input = beta * input
2818
+ op_output = (1 / beta) * softplus_op(scaling_input)
2819
+ return ops.select(input * beta > threshold, input, op_output)
2820
+
2821
+
2754
2822
  def silu(x):
2755
2823
  r"""
2756
2824
  Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
@@ -2860,7 +2928,7 @@ def sigmoid(input):
2860
2928
  >>> print(output)
2861
2929
  [0.7310586 0.880797 0.95257413 0.98201376 0.9933072 ]
2862
2930
  """
2863
- return sigmoid_(input)
2931
+ return _get_cache_prim(NN_OPS.Sigmoid)()(input)
2864
2932
 
2865
2933
 
2866
2934
  def logsigmoid(x):
@@ -2946,11 +3014,19 @@ def dense(input, weight, bias=None):
2946
3014
  _check_is_tensor("bias", bias, "dense")
2947
3015
  weight = ops.t(weight)
2948
3016
  input = ops.matmul(input, weight)
3017
+ input_shape = input.shape
2949
3018
  if bias is not None:
2950
3019
  input = input + bias
3020
+ _check_dense_add_bias_shape(input_shape, input.shape, bias.shape)
2951
3021
  return input
2952
3022
 
2953
3023
 
3024
+ def _check_dense_add_bias_shape(input_shape, output_shape, bias_shape):
3025
+ """Check that the output has the correct shape after adding bias."""
3026
+ if input_shape != output_shape:
3027
+ raise ValueError(f"For dense, the bias shape {bias_shape} does not match the input shape {input_shape}.")
3028
+
3029
+
2954
3030
  @_primexpr
2955
3031
  def check_dense_inputs_same_shape(input1_shape, input2_shape, prim_name=None):
2956
3032
  """check bidense input Tensors' shape"""
@@ -2965,7 +3041,10 @@ def bidense(input1, input2, weight, bias=None):
2965
3041
  Applies bilinear dense connected layer for `input1` and `input2`. The bilinear dense function is defined as:
2966
3042
 
2967
3043
  .. math::
2968
- output = input1^{T} weight input2 + bias
3044
+ output = x_{1}^{T}Ax_{2} + b
3045
+
3046
+ :math:`x_{1}` represents `input1` , :math:`x_{2}` represents `input2` , :math:`A` represents `weight` ,
3047
+ :math:`b` represents `bias` .
2969
3048
 
2970
3049
  .. warning::
2971
3050
  This is an experimental API that is subject to change or deletion.
@@ -3391,7 +3470,9 @@ def relu6(x):
3391
3470
  It returns :math:`\min(\max(0,x), 6)` element-wise.
3392
3471
 
3393
3472
  Args:
3394
- x (Tensor): Input Tensor of float16 or float32 data type.
3473
+ x (Tensor): Tensor of shape :math:`(N, *)`,
3474
+ where :math:`*` means any number of additional dimensions.
3475
+ Data type must be float16, float32.
3395
3476
 
3396
3477
  Returns:
3397
3478
  Tensor, with the same dtype and shape as the `x`.
@@ -3528,6 +3609,9 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
3528
3609
  _lower = Tensor(lower, mstype.float32)
3529
3610
  _upper = Tensor(upper, mstype.float32)
3530
3611
  _size = input.shape
3612
+ if ops.is_sequence_value_unknown(_size):
3613
+ dyn_shape = _get_cache_prim(P.TensorShape)()
3614
+ _size = dyn_shape(input)
3531
3615
  sign_matrix = _get_cache_prim(P.Sign)()(input)
3532
3616
  negative_filter = sign_matrix.clip(None, 0)
3533
3617
  positive_filter = sign_matrix.clip(0, None)
@@ -3615,11 +3699,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3615
3699
  l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
3616
3700
  \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
3617
3701
 
3618
- where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight,
3619
- N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
3620
- classes.
3702
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
3703
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
3621
3704
 
3622
- If reduction is not 'none' (default 'mean'), then
3705
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
3623
3706
 
3624
3707
  .. math::
3625
3708
 
@@ -3638,11 +3721,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3638
3721
  \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
3639
3722
  l_n = - \sum_{c=1}^C w_c \log \frac{\exp(x_{n,c})}{\sum_{i=1}^C \exp(x_{n,i})} y_{n,c}
3640
3723
 
3641
- where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight,
3642
- N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
3643
- classes.
3724
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
3725
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
3644
3726
 
3645
- If reduction is not 'none' (default 'mean'), then
3727
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
3646
3728
 
3647
3729
  .. math::
3648
3730
 
@@ -3658,16 +3740,19 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3658
3740
  in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)`.
3659
3741
  `input` is expected to be log-probabilities, data type must be float16 or float32.
3660
3742
  target (Tensor): For class indices, tensor of shape :math:`()`, :math:`(N)` or
3661
- :math:`(N, d_1, d_2, ..., d_K)` , data type must be int32.
3662
- For probabilities, tensor of shape :math:`(C,)` :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` ,
3663
- data type must be float16 or float32.
3743
+ :math:`(N, d_1, d_2, ..., d_K)` , data type must be int32. For probabilities, tensor of shape :math:`(C,)` ,
3744
+ :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32.
3664
3745
  weight (Tensor): A rescaling weight applied to the loss of each batch element.
3665
- If not None, the shape is :math:`(C,)`,
3666
- data type must be float16 or float32. Default: ``None`` .
3746
+ If not None, the shape is :math:`(C,)`, data type must be float16 or float32. Default: ``None`` .
3667
3747
  ignore_index (int): Specifies a target value that is ignored
3668
3748
  and does not contribute to the input gradient. Default: ``-100`` .
3669
- reduction (str): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , or ``'sum'`` .
3670
- Default: ``'mean'`` .
3749
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3750
+ ``'sum'`` . Default: ``'mean'`` .
3751
+
3752
+ - ``'none'``: no reduction will be applied.
3753
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
3754
+ - ``'sum'``: the output elements will be summed.
3755
+
3671
3756
  label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
3672
3757
  from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
3673
3758
 
@@ -3678,17 +3763,16 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3678
3763
  ``Ascend`` ``GPU`` ``CPU``
3679
3764
 
3680
3765
  Examples:
3681
- >>> import mindspore
3766
+ >>> import mindspore as ms
3682
3767
  >>> import numpy as np
3683
- >>> from mindspore import Tensor, ops
3684
3768
  >>> # Case 1: Indices labels
3685
- >>> inputs = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
3686
- >>> target = mindspore.Tensor(np.array([1, 0, 4]), mindspore.int32)
3687
- >>> output = ops.cross_entropy(inputs, target)
3769
+ >>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
3770
+ >>> target = ms.Tensor(np.array([1, 0, 4]), ms.int32)
3771
+ >>> output = ms.ops.cross_entropy(inputs, target)
3688
3772
  >>> # Case 2: Probability labels
3689
- >>> inputs = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
3690
- >>> target = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
3691
- >>> output = ops.cross_entropy(inputs, target)
3773
+ >>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
3774
+ >>> target = ms.Tensor(np.random.randn(3, 5), ms.float32)
3775
+ >>> output = ms.ops.cross_entropy(inputs, target)
3692
3776
  """
3693
3777
  _check_is_tensor('input', input, "cross_entropy_loss")
3694
3778
  _check_is_tensor('target', target, "cross_entropy_loss")
@@ -3743,7 +3827,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
3743
3827
  N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
3744
3828
  classes.
3745
3829
 
3746
- If reduction is not 'none' (default 'mean'), then
3830
+ If `reduction` is not ``None`` (default 'mean'), then
3747
3831
 
3748
3832
  .. math::
3749
3833
 
@@ -3763,8 +3847,13 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
3763
3847
  The data type must be float16 or float32. Default: ``None`` .
3764
3848
  ignore_index (int): Specifies a target value that is ignored
3765
3849
  and does not contribute to the input gradient. Default: ``-100`` .
3766
- reduction (str): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , or ``'sum'`` .
3767
- Default: ``'mean'`` .
3850
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3851
+ ``'sum'`` . Default: ``'mean'`` .
3852
+
3853
+ - ``'none'``: no reduction will be applied.
3854
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
3855
+ - ``'sum'``: the output elements will be summed.
3856
+
3768
3857
  label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
3769
3858
  from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
3770
3859
 
@@ -3858,7 +3947,7 @@ def l1_loss(input, target, reduction='mean'):
3858
3947
  r"""
3859
3948
  Calculate the mean absolute error between the `input` value and the `target` value.
3860
3949
 
3861
- Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to "none" ,
3950
+ Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to ``"none"``,
3862
3951
  then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
3863
3952
 
3864
3953
  The formula is as follows:
@@ -3881,18 +3970,21 @@ def l1_loss(input, target, reduction='mean'):
3881
3970
  input (Tensor): Predicted value, Tensor of any dimension.
3882
3971
  target (Tensor): Target value, usually has the same shape as the `input`.
3883
3972
  If `input` and `target` have different shape, make sure they can broadcast to each other.
3884
- reduction (str, optional): Type of reduction to be applied to loss.
3885
- The optional value is ``"mean"`` , ``"sum"`` or
3886
- ``"none"`` . Default: ``'mean'`` .
3973
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3974
+ ``'sum'`` . Default: ``'mean'`` .
3975
+
3976
+ - ``'none'``: no reduction will be applied.
3977
+ - ``'mean'``: compute and return the mean of elements in the output.
3978
+ - ``'sum'``: the output elements will be summed.
3887
3979
 
3888
3980
  Returns:
3889
- Tensor or Scalar, if `reduction` is "none", return a Tensor with same shape and dtype as `input`.
3981
+ Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
3890
3982
  Otherwise, a scalar value will be returned.
3891
3983
 
3892
3984
  Raises:
3893
3985
  TypeError: If `input` is not a Tensor.
3894
3986
  TypeError: If `target` is not a Tensor.
3895
- ValueError: If `reduction` is not one of "none", "mean" or "sum".
3987
+ ValueError: If `reduction` is not one of ``"none"``, ``"mean"`` or ``"sum"``.
3896
3988
 
3897
3989
  Supported Platforms:
3898
3990
  ``Ascend`` ``GPU`` ``CPU``
@@ -3948,16 +4040,20 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
3948
4040
  target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`, same shape and dtype as the `input`.
3949
4041
  beta (float): A parameter used to control the point where the function will change between
3950
4042
  L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
3951
- reduction (str): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` or ``'sum'`` .
3952
- Default: ``'none'`` .
4043
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4044
+ ``'sum'`` . Default: ``'none'`` .
4045
+
4046
+ - ``'none'``: no reduction will be applied.
4047
+ - ``'mean'``: compute and return the mean of elements in the output.
4048
+ - ``'sum'``: the output elements will be summed.
3953
4049
 
3954
4050
  Returns:
3955
- Tensor, if `reduction` is 'none', then output is a tensor with the same shape as `input`.
4051
+ Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `input`.
3956
4052
  Otherwise, the shape of output tensor is :math:`(1,)`.
3957
4053
 
3958
4054
  Raises:
3959
4055
  TypeError: If `beta` is not a float.
3960
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
4056
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
3961
4057
  TypeError: If dtype of `input` or `target` is not one of float16, float32, float64.
3962
4058
  ValueError: If `beta` is less than or equal to 0.
3963
4059
  ValueError: If shape of `input` is not the same as `target`.
@@ -4072,6 +4168,7 @@ def leaky_relu(input, alpha=0.2):
4072
4168
  select_op = _get_cache_prim(P.Maximum)()
4073
4169
  if alpha > 1:
4074
4170
  select_op = _get_cache_prim(P.Minimum)()
4171
+ alpha = _get_cache_prim(P.Cast)()(F.scalar_to_tensor(alpha), input.dtype)
4075
4172
  return select_op(alpha * input, input)
4076
4173
 
4077
4174
 
@@ -4158,6 +4255,10 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
4158
4255
  r"""
4159
4256
  Local Response Normalization.
4160
4257
 
4258
+ .. warning::
4259
+ lrn is deprecated on Ascend due to potential accuracy problem. It's recommended to use other
4260
+ normalization methods, e.g. :class:`mindspore.ops.batch_norm`.
4261
+
4161
4262
  .. math::
4162
4263
 
4163
4264
  b_{c} = a_{c}\left(k + \frac{\alpha}{n}
@@ -4186,7 +4287,7 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
4186
4287
  TypeError: If `x` is not a Tensor.
4187
4288
 
4188
4289
  Supported Platforms:
4189
- ``Ascend`` ``GPU`` ``CPU``
4290
+ ``GPU`` ``CPU``
4190
4291
 
4191
4292
  Examples:
4192
4293
  >>> import mindspore
@@ -4219,7 +4320,11 @@ def mish(x):
4219
4320
  <https://arxiv.org/abs/1908.08681>`_.
4220
4321
 
4221
4322
  Args:
4222
- x (Tensor): The input Tensor with float16, float32 or float64 data type.
4323
+ x (Tensor): The input Tensor.
4324
+ Supported dtypes:
4325
+
4326
+ - GPU/CPU: float16, float32, float64.
4327
+ - Ascend: float16, float32.
4223
4328
 
4224
4329
  Returns:
4225
4330
  Tensor, with the same type and shape as the `x`.
@@ -4320,10 +4425,40 @@ def _check_type_and_shape_same(param_name1, input_data1, param_name2, input_data
4320
4425
 
4321
4426
 
4322
4427
  def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
4323
- """
4428
+ r"""
4324
4429
  MarginRankingLoss creates a criterion that measures the loss.
4325
4430
 
4326
- For details, please refer to :class:`mindspore.nn.MarginRankingLoss`.
4431
+ Given two tensors :math:`input1`, :math:`input2` and a Tensor label :math:`target` with values 1 or -1,
4432
+ the operation is as follows:
4433
+
4434
+ .. math::
4435
+ \text{loss}(input1, input2, target) = \max(0, -target * (input1 - input2) + \text{margin})
4436
+
4437
+ Args:
4438
+ input1 (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4439
+ input2 (Tensor): Tensor of shape :math:`(N, *)`, same shape and dtype as `input1`.
4440
+ target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
4441
+ :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_2, x_3, ..., x_R)`.
4442
+ margin (float, optional): Specify the adjustment factor of the operation. Default: ``0.0`` .
4443
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4444
+ ``'sum'`` . Default: ``'mean'`` .
4445
+
4446
+ - ``'none'``: no reduction will be applied.
4447
+ - ``'mean'``: compute and return the mean of elements in the output.
4448
+ - ``'sum'``: the output elements will be summed.
4449
+
4450
+ Returns:
4451
+ Tensor or Scalar. if `reduction` is ``"none"``, its shape is the same as `labels`.
4452
+ Otherwise, a scalar value will be returned.
4453
+
4454
+ Raises:
4455
+ TypeError: If `margin` is not a float.
4456
+ TypeError: If `input1`, `input2` or `target` is not a Tensor.
4457
+ TypeError: If the types of `input1` and `input2` are inconsistent.
4458
+ TypeError: If the types of `input1` and `target` are inconsistent.
4459
+ ValueError: If the shape of `input1` and `input2` are inconsistent.
4460
+ ValueError: If the shape of `input1` and `target` are inconsistent.
4461
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` , ``'sum'``.
4327
4462
 
4328
4463
  Supported Platforms:
4329
4464
  ``Ascend`` ``GPU`` ``CPU``
@@ -4334,7 +4469,7 @@ def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
4334
4469
  >>> import numpy as np
4335
4470
  >>> input1 = Tensor(np.array([0.3864, -2.4093, -1.4076]), ms.float32)
4336
4471
  >>> input2 = Tensor(np.array([-0.6012, -1.6681, 1.2928]), ms.float32)
4337
- >>> target = sign(Tensor(np.array([-2, -2, 3]), ms.float32))
4472
+ >>> target = ops.Sign()(Tensor(np.array([-2, -2, 3]), ms.float32))
4338
4473
  >>> output = ops.margin_ranking_loss(input1, input2, target)
4339
4474
  >>> print(output)
4340
4475
  1.2293333
@@ -4375,17 +4510,20 @@ def cosine_embedding_loss(input1, input2, target, margin=0.0, reduction="mean"):
4375
4510
  target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
4376
4511
  :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_3, x_4, ..., x_R)`.
4377
4512
  margin (float, optional): Should be in [-1.0, 1.0]. Default: 0.0.
4378
- reduction (str, optional): Specifies which reduction to be applied to the output. It must be one of
4379
- ``"none"`` , ``"mean"`` , and ``"sum"`` ,
4380
- meaning no reduction, reduce mean and sum on output, respectively. Default: ``"mean"`` .
4513
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4514
+ ``'sum'`` . Default: ``'mean'`` .
4515
+
4516
+ - ``'none'``: no reduction will be applied.
4517
+ - ``'mean'``: compute and return the mean of elements in the output.
4518
+ - ``'sum'``: the output elements will be summed.
4381
4519
 
4382
4520
  Returns:
4383
- Tensor or Scalar, if `reduction` is "none", its shape is the same as `target`.
4521
+ Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `target`.
4384
4522
  Otherwise, a scalar value will be returned.
4385
4523
 
4386
4524
  Raises:
4387
4525
  TypeError: If `margin` is not a float.
4388
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
4526
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
4389
4527
  ValueError: If `margin` is not in range [-1, 1].
4390
4528
 
4391
4529
  Supported Platforms:
@@ -4529,14 +4667,24 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
4529
4667
 
4530
4668
  Args:
4531
4669
  input (Tensor): input with shape of :math:`(N, C, H_{in}, W_{in})` (4-D case) or :math:`(N, C, D_{in},
4532
- H_{in}, W_{in})` (5-D case) and dtype of float16, float32 or float64.
4670
+ H_{in}, W_{in})` (5-D case) and dtype of float32 or float64.
4533
4671
  grid (Tensor): flow-field with shape of :math:`(N, H_{out}, W_{out}, 2)` (4-D case) or :math:`(N, D_{out},
4534
4672
  H_{out}, W_{out}, 3)` (5-D case) and same dtype as `input`.
4535
4673
  mode (str): An optional string specifying the interpolation method. The optional values are
4536
- 'bilinear', 'nearest'. Default: ``'bilinear'`` . Note: `bicubic` is not supported yet. When
4674
+ ``'bilinear'``, ``'nearest'``. Default: ``'bilinear'`` . Note: `bicubic` is not supported yet. When
4537
4675
  `mode="bilinear"` and the input is 5-D, the interpolation mode used internally will actually
4538
4676
  be trilinear. However, when the input is 4-D, the interpolation mode will legistimately be bilinear.
4539
4677
  Default: ``'bilinear'`` .
4678
+
4679
+ - ``'nearest'``: Nearest neighbor interpolation. Each output pixel is assigned the value of the
4680
+ nearest input pixel. This method is simple and fast but can result in blocky or pixelated outputs.
4681
+ - ``'bilinear'``: Bilinear interpolation. Each output pixel is a weighted average of the four nearest input
4682
+ pixels, computed using bilinear interpolation. This method produces smoother results compared
4683
+ to nearest neighbor interpolation.
4684
+ - ``'trilinear'``: Trilinear interpolation. This is an extension of bilinear interpolation to 3D data.
4685
+ It performs bilinear interpolation in the two spatial dimensions and linear interpolation along
4686
+ the third dimension. It is commonly used for volume or 3D image interpolation.
4687
+
4540
4688
  padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
4541
4689
  "reflection". Default: ``'zeros'`` .
4542
4690
  align_corners (bool): An optional bool. If set to `True`, the extrema (-1 and 1) are considered as referring to
@@ -4617,10 +4765,13 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
4617
4765
  input_lengths (Union(tuple, Tensor)): Lengths of the input. A tuple or Tensor of shape(N).
4618
4766
  target_lengths (Union(tuple, Tensor)): Lengths of the target. A tuple or Tensor of shape(N).
4619
4767
  blank (int, optional): The blank label. Default: ``0`` .
4620
- reduction (str, optional): Implements the reduction method to the output with
4621
- ``'none'`` , ``'mean'`` , or ``'sum'`` ,
4622
- respectively indicate that no calculation is specified, that the mean is used, and that is calculated
4623
- using summation. Default: ``"mean"`` .
4768
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4769
+ ``'sum'`` . Default: ``'mean'`` .
4770
+
4771
+ - ``'none'``: no reduction will be applied.
4772
+ - ``'mean'``: compute and return the mean of elements in the output.
4773
+ - ``'sum'``: the output elements will be summed.
4774
+
4624
4775
  zero_infinity (bool, optional): Whether to set infinite loss and correlation gradient to 0. Default: ``False`` .
4625
4776
 
4626
4777
  Returns:
@@ -4704,8 +4855,12 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
4704
4855
  full (bool, optional): Include the constant term in the loss calculation. When :math:`full=True`,
4705
4856
  the constant term will be :math:`const = 0.5*log(2\pi)`. Default: ``False``.
4706
4857
  eps (float, optional): Used to improve the stability of log function must be greater than 0. Default: ``1e-6`` .
4707
- reduction (str, optional): Apply specific reduction method to the
4708
- output: ``"none"``, ``"mean"``, or ``"sum"``. Default: ``'mean'``.
4858
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4859
+ ``'sum'`` . Default: ``'mean'`` .
4860
+
4861
+ - ``'none'``: no reduction will be applied.
4862
+ - ``'mean'``: compute and return the mean of elements in the output.
4863
+ - ``'sum'``: the output elements will be summed.
4709
4864
 
4710
4865
  Returns:
4711
4866
  Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
@@ -4722,8 +4877,7 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
4722
4877
 
4723
4878
  Examples:
4724
4879
  >>> import numpy as np
4725
- >>> from mindspore import Tensor
4726
- >>> import mindspore.ops as ops
4880
+ >>> from mindspore import Tensor, ops
4727
4881
  >>> import mindspore.common.dtype as mstype
4728
4882
  >>> arr1 = np.arange(8).reshape((4, 2))
4729
4883
  >>> arr2 = np.array([2, 3, 1, 4, 6, 4, 4, 9]).reshape((4, 2))
@@ -4831,9 +4985,12 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
4831
4985
  Has the same shape as `inputs`, contains -1 or 1.
4832
4986
  margin (float, int): Threshold defined by Hinge Embedding Loss :math:`margin`.
4833
4987
  Represented as :math:`\Delta` in the formula. Default: ``1.0`` .
4834
- reduction (str): Specify the computing method to be applied to the outputs:
4835
- ``'none'`` , ``'mean'`` , or ``'sum'`` .
4836
- Default: ``'mean'`` .
4988
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4989
+ ``'sum'`` . Default: ``'mean'`` .
4990
+
4991
+ - ``'none'``: no reduction will be applied.
4992
+ - ``'mean'``: compute and return the mean of elements in the output.
4993
+ - ``'sum'``: the output elements will be summed.
4837
4994
 
4838
4995
  Returns:
4839
4996
  Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
@@ -4843,7 +5000,7 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
4843
5000
  TypeError: If `targets` is not a Tensor.
4844
5001
  TypeError: If `margin` is not a float or int.
4845
5002
  ValueError: If `targets` does not have the same shape as `inputs` or they could not broadcast to each other.
4846
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
5003
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
4847
5004
 
4848
5005
  Supported Platforms:
4849
5006
  ``Ascend`` ``GPU`` ``CPU``
@@ -4889,6 +5046,9 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
4889
5046
  r"""
4890
5047
  Performs greedy decoding on the logits given in inputs.
4891
5048
 
5049
+ Note:
5050
+ On Ascend, 'merge_repeated' can not be set to false.
5051
+
4892
5052
  Args:
4893
5053
  inputs (Tensor): The input Tensor must be a 3-D tensor whose shape is
4894
5054
  :math:`(max\_time, batch\_size, num\_classes)`. `num_classes` must be `num_labels + 1` classes,
@@ -5068,74 +5228,87 @@ def _check_conv_iterable_lengths(iterable, dim, iter_name):
5068
5228
 
5069
5229
  def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
5070
5230
  r"""
5071
- Applies a 1D convolution over an input tensor.
5072
- The input tensor is typically of shape :math:`(N, C_{in}, W_{in})`,
5073
- where :math:`N` is batch size, :math:`C_{in}` is channel number, :math:`W` is width, :math:`X_i` is
5074
- the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
5075
- For each batch of shape :math:`(C_{in}, W_{in})`, the formula is defined as:
5231
+ Applies a 1D convolution over an input tensor. The input Tensor is typically
5232
+ of shape :math:`(N, C_{in}, L_{in})`,
5233
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`L` is input sequence width.
5234
+
5235
+ The output is calculated based on formula:
5076
5236
 
5077
5237
  .. math::
5078
5238
 
5079
- out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{j}, X_i) + b_j,
5239
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
5240
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
5241
+
5242
+ where :math:`bias` is the output channel bias, :math:`ccor` is
5243
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5244
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5245
+
5246
+ Here are the indices' meanings:
5247
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
5248
+
5249
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
5250
+ output channels, which is also equal to the number of kernels.
5080
5251
 
5081
- where :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ operator,
5082
- :math:`C_{in}` is the input channel number, :math:`j` ranges
5083
- from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
5084
- filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{j}` is a slice
5085
- of kernel, and it has shape :math:`(\text{kernal_size})`, where :math:`\text{kernel_size}` is the width of
5086
- the convolution kernel. The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
5087
- where `groups` is the group number to split the input in the channel dimension.
5252
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
5253
+ input channels, which is also equal to the number of channels in the convolutional kernels.
5088
5254
 
5089
- If the `pad_mode` is set to be "valid", the output width will be :math:`\left \lfloor{
5090
- 1 + \frac{W_{in} + \text{padding[0]} - \text{kernel_size} - (\text{kernel_size} - 1) \times(\text{dilation} - 1)}
5091
- {\text { stride }}} \right \rfloor`.
5255
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
5256
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
5257
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
5258
+ channel in the :math:`i`-th batch of the input feature map.
5092
5259
 
5093
- where :math:`dilation` is spacing between kernel elements, :math:`stride` is The step length of each step,
5094
- :math:`padding` is zero-padding added to both sides of the input.
5095
- For output width on other `pad_mode`, please refer to formula on `mindspore.nn.Conv1d
5096
- <https://www.mindspore.cn/docs/en/r2.1/api_python/nn/mindspore.nn.Conv2d.html>`_.
5260
+ The shape of the convolutional kernel is given by :math:`(kernel\_size)`,
5261
+ where :math:`kernel\_size` is the width of the kernel.
5262
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5263
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`,
5264
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
5097
5265
 
5098
- The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
5099
- <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. More detailed introduction can be found here:
5100
- `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
5266
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5267
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
5268
+ and `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
5101
5269
 
5102
5270
  Note:
5103
5271
  On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
5104
5272
  That is, when `groups>1`, condition `C_{in}` = `C_{out}` = `groups` must be satisfied.
5105
5273
 
5106
5274
  Args:
5107
- input (Tensor): Tensor of shape :math:`(N, C_{in}, W_{in})`.
5108
- weight (Tensor): Tensor of shape
5109
- :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`, then the size of kernel is
5110
- :math:`(\text{kernel_size})`.
5111
- bias (Tensor): Bias Tensor with shape :math:`(C_{out})`.
5275
+ input (Tensor): Input Tensor of shape :math:`(N, C_{in}, L_{in})`.
5276
+ weight (Tensor): The convolutional kernel value, it should has shape
5277
+ :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`.
5278
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
5112
5279
  When bias is None, zeros will be used. Default: ``None`` .
5113
5280
  stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number or a tuple of one int
5114
- that represents width of movement. Default: 1.
5281
+ that represents width of movement. Default: ``1``.
5115
5282
  pad_mode (str, optional): Specifies padding mode. The optional values are
5116
5283
  ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
5117
5284
 
5118
- - same: Adopts the way of completion. The height and width of the output will be equal to
5285
+ - ``"same"``: Adopts the way of completion. The height and width of the output will be equal to
5119
5286
  the input `x` divided by stride. The padding will be evenly calculated in left and right possiblily.
5120
5287
  Otherwise, the last extra padding will be calculated from the right side.
5121
5288
  If this mode is set, `padding` must be 0.
5122
5289
 
5123
- - valid: Adopts the way of discarding. The possible largest width of output will be returned
5290
+ - ``"valid"``: Adopts the way of discarding. The possible largest width of output will be returned
5124
5291
  without padding. Extra pixels will be discarded. If this mode is set, `padding` must be 0.
5125
5292
 
5126
- - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input
5293
+ - ``"pad"``: Implicit paddings on both sides of the input `x`.
5294
+ The number of `padding` will be padded to the input
5127
5295
  Tensor borders. `padding` must be greater than or equal to 0.
5128
- padding (Union(int, tuple[int], list[int]), optional): Implicit paddings on both sides of `input`, meaning the
5296
+ padding (Union(int, tuple[int], list[int]), optional): Specifies the amount of padding to apply on
5297
+ both side of `input` when `pad_mode` is set to ``"pad"``. The
5129
5298
  paddings of left and right are the same, equal to padding or padding[0] when padding is a tuple of
5130
5299
  1 integer. Default: ``0`` .
5131
- dilation (Union(int, tuple[int]), optional): Gaps between kernel elements. The data type is int or a tuple of
5132
- 1 integer. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
5133
- there will be :math:`k - 1` pixels skipped for each sampling location. Its value must be greater than or
5134
- equal to 1 and bounded by the width of `input`. Default: ``1`` .
5300
+ dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution.
5301
+ It can be a single int or a tuple of 1 integer.
5302
+ Assuming :math:`dilation=(d0,)`, the convolutional kernel samples the input with a
5303
+ spacing of :math:`d0-1` elements in the width direction.
5304
+ The value should be in the ranges [1, L].
5305
+ Default: ``1`` .
5135
5306
  groups (int, optional): Splits `input` into groups. Default: ``1`` .
5136
5307
 
5137
5308
  Returns:
5138
- Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, W_{out})`.
5309
+ Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, L_{out})`.
5310
+ To see how different pad modes affect the output shape, please refer to
5311
+ :class:`mindspore.nn.Conv1d` for more details.
5139
5312
 
5140
5313
  Raises:
5141
5314
  TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
@@ -5204,40 +5377,44 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5204
5377
 
5205
5378
  def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
5206
5379
  r"""
5207
- Applies a 2D convolution over an input tensor.
5208
- The input tensor is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
5209
- where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is height, :math:`W` is width, :math:`X_i` is
5210
- the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
5211
- For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
5380
+ Applies a 2D convolution over an input tensor. The input tenor is typically of
5381
+ shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
5382
+ channel number, :math:`H` is feature height, :math:`W` is feature width.
5383
+
5384
+ The output is calculated based on formula:
5212
5385
 
5213
5386
  .. math::
5214
5387
 
5215
- out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
5388
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
5389
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
5216
5390
 
5217
- where :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ operator,
5218
- :math:`C_{in}` is the input channel number, :math:`j` ranges
5219
- from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
5220
- filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
5221
- of kernel, and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, where :math:`\text{
5222
- kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution kernel.
5223
- The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
5224
- where `groups` is the group number to split the input in the channel dimension.
5391
+ where :math:`bias` is the output channel bias, :math:`ccor` is
5392
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5393
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5225
5394
 
5226
- If the `pad_mode` is set to be "valid", the output height and width will be :math:`\left \lfloor{
5227
- 1 + \frac{H_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
5228
- (\text{kernel_size[0]} - 1) \times(\text{dilation[0]} - 1)} {\text { stride[0] }}} \right \rfloor` and
5395
+ Here are the indices' meanings:
5396
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
5229
5397
 
5230
- :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
5231
- (\text{kernel_size[1]} - 1) \times(\text{dilation[1]} - 1)} {\text { stride[1] }}} \right \rfloor` respectively.
5398
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
5399
+ output channels, which is also equal to the number of kernels.
5232
5400
 
5233
- where :math:`dilation` is spacing between kernel elements, :math:`stride` is The step length of each step,
5234
- :math:`padding` is zero-padding added to both sides of the input.
5235
- For output height and width on other `pad_mode`, please refer to formula on `mindspore.nn.Conv2d
5236
- <https://www.mindspore.cn/docs/en/r2.1/api_python/nn/mindspore.nn.Conv2d.html>`_.
5401
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
5402
+ input channels, which is also equal to the number of channels in the convolutional kernels.
5237
5403
 
5238
- The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
5239
- <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. More detailed introduction can be found here:
5240
- `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
5404
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
5405
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
5406
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
5407
+ channel in the :math:`i`-th batch of the input feature map.
5408
+
5409
+ The shape of the convolutional kernel is given by :math:`(kernel\_size[0], kernel\_size[1])`,
5410
+ where :math:`kernel\_size[0]` and :math:`kernel\_size[1]` are the height and width of the kernel, respectively.
5411
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5412
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
5413
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
5414
+
5415
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5416
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
5417
+ `ConvNets <http://cs231n.github.io/convolutional-networks/>`_.
5241
5418
 
5242
5419
  Note:
5243
5420
  On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
@@ -5248,7 +5425,7 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5248
5425
  weight (Tensor): Tensor of shape
5249
5426
  :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
5250
5427
  is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
5251
- bias (Tensor): Bias Tensor with shape :math:`(C_{out})`.
5428
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
5252
5429
  When bias is ``None`` , zeros will be used. Default: ``None`` .
5253
5430
  stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
5254
5431
  the height and width of movement are both strides, or a tuple of two int numbers that
@@ -5278,6 +5455,9 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5278
5455
 
5279
5456
  Returns:
5280
5457
  Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
5458
+ To see how different pad modes affect the output shape, please refer to
5459
+ :class:`mindspore.nn.Conv2d` for more details.
5460
+
5281
5461
 
5282
5462
  Raises:
5283
5463
  TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
@@ -5421,8 +5601,9 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
5421
5601
  Calculates the error between the predicted value and the target value,
5422
5602
  which has the best of both the loss of l1 and the loss of mse.
5423
5603
 
5424
- Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the reduction parameter is set to "none"
5425
- then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction. The formula is as follows:
5604
+ Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the `reduction` parameter
5605
+ is set to ``"none"`` then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
5606
+ The formula is as follows:
5426
5607
 
5427
5608
  .. math::
5428
5609
  \ell(x, y) = L = \{l_1,\dots,l_N\}^\top
@@ -5451,21 +5632,25 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
5451
5632
  target (Tensor): Target value, has same dtype and shape as the `input` in common cases.
5452
5633
  However, when the shape of `target` is different from the shape of `input`,
5453
5634
  and they should be broadcasted to each other.
5454
- reduction (str): Type of reduction to be applied to loss.
5455
- The optional values are ``'mean'`` , ``'sum'`` and ``'none'`` .
5456
- Default: ``'mean'``.
5635
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
5636
+ ``'sum'`` . Default: ``'mean'`` .
5637
+
5638
+ - ``'none'``: no reduction will be applied.
5639
+ - ``'mean'``: compute and return the mean of elements in the output.
5640
+ - ``'sum'``: the output elements will be summed.
5641
+
5457
5642
  delta (Union[int, float]): The threshold to change between two type of loss.
5458
5643
  The value must be greater than zero. Default: ``1.0`` .
5459
5644
 
5460
5645
  Returns:
5461
- Tensor or Scalar, if `reduction` is "none", return a Tensor with same shape and dtype as `input`.
5646
+ Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
5462
5647
  Otherwise, a scalar value will be returned.
5463
5648
 
5464
5649
  Raises:
5465
5650
  TypeError: If `input` or `target` is not a Tensor.
5466
5651
  TypeError: If dtype of `delta` is neither float nor int.
5467
5652
  ValueError: If `delta` is less than or equal to 0.
5468
- ValueError: If `reduction` is not one of "none", "mean", "sum".
5653
+ ValueError: If `reduction` is not one of ``"none"``, ``"mean"``, ``"sum"``.
5469
5654
  ValueError: If `input` and `target` have different shapes and cannot be broadcasted to each other.
5470
5655
 
5471
5656
  Supported Platforms:
@@ -5655,15 +5840,20 @@ def bias_add(input_x, bias):
5655
5840
  consistent with the shape of the `input_x` Tensor.
5656
5841
 
5657
5842
  Args:
5658
- input_x (Tensor): The input tensor. The shape can be 2-5 dimensions.
5659
- bias (Tensor): The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of `input_x`.
5843
+ input_x (Tensor): The input tensor. The shape can be 2-5 dimensions. Supported dtypes:
5844
+
5845
+ - Ascend/CPU: all Number type.
5846
+ - GPU: float16, float32, int8.
5847
+
5848
+ bias (Tensor): The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of
5849
+ `input_x`. It has the same type as `input_x`.
5660
5850
 
5661
5851
  Returns:
5662
5852
  Tensor, with the same shape and data type as `input_x`.
5663
5853
 
5664
5854
  Raises:
5665
5855
  TypeError: If `input_x` or `bias` is not a Tensor.
5666
- TypeError: If dtype of `input_x` or `bias` is inconsistent.
5856
+ TypeError: If dtype of `input_x` and `bias` is inconsistent.
5667
5857
  TypeError: If dimension of `input_x` is not in the range [2, 5].
5668
5858
 
5669
5859
  Supported Platforms:
@@ -5718,11 +5908,12 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
5718
5908
  the loss function
5719
5909
  will not consider any sample weights, and each sample will be treated as having equal importance
5720
5910
  when calculating the loss.
5721
- reduction (str, optional): Specify the protocol calculation method used to output the results.
5722
- Its value must be one of ``'none'`` , ``'mean'`` or ``'sum'`` ,
5723
- respectively indicate that no calculation method is
5724
- specified, using the average value for calculation, and using summation for calculation, not case-sensitive.
5725
- Default: ``'mean'`` .
5911
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
5912
+ ``'sum'`` . Default: ``'mean'`` .
5913
+
5914
+ - ``'none'``: no reduction will be applied.
5915
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
5916
+ - ``'sum'``: the output elements will be summed.
5726
5917
 
5727
5918
  Returns:
5728
5919
  Tensor or Scalar. Returns Tensor that has the same dtype and shape as `logits` if `reduction` is 'none'.
@@ -5731,7 +5922,7 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
5731
5922
  Raises:
5732
5923
  TypeError: If `logits`, `labels` or `weight` is not a Tensor.
5733
5924
  TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 nor float32.
5734
- ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
5925
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
5735
5926
  ValueError: If shape of `labels` is not the same as `logits` or `weight` (if given).
5736
5927
 
5737
5928
  Supported Platforms:
@@ -5754,32 +5945,46 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
5754
5945
 
5755
5946
  def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
5756
5947
  r"""
5757
- Applies a 3D convolution over an input tensor. The input tensor is typically of shape
5758
- :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape
5759
- :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, where :math:`N` is batch size, :math:`C` is channel number,
5760
- :math:`D` is depth, :math:`H, W` is feature height and width respectively.
5761
- the output value of a layer is calculated as:
5948
+ Applies a 3D convolution over an input tensor. The input tensor is typically of
5949
+ shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C`
5950
+ is channel number, :math:`D` is feature depth, :math:`H` is feature height, :math:`W` is feature width.
5951
+
5952
+ The output is calculated based on formula:
5762
5953
 
5763
5954
  .. math::
5764
- \operatorname{out}\left(N_{i}, C_{\text {out}_j}\right)=\operatorname{bias}\left(C_{\text {out}_j}\right)+
5765
- \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right),
5766
- \operatorname{input}\left(N_{i}, k\right))
5767
-
5768
- where :math:`k` is kernel,
5769
- :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ ,
5770
- :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of
5771
- the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)`
5772
- is a convolution kernel slice with shape
5773
- :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
5774
- where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are
5775
- the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter
5776
- and :math:`\text{X}` is the input tensor.
5777
- The shape of full convolution kernel is
5778
- :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
5779
- where `groups` is the number of groups to split `input` in the channel dimension.
5780
-
5781
- For more details, please refer to the paper `Gradient Based Learning Applied to Document
5782
- Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ .
5955
+
5956
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
5957
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
5958
+
5959
+ where :math:`bias` is the output channel bias, :math:`ccor` is
5960
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5961
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5962
+
5963
+ Here are the indices' meanings:
5964
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
5965
+
5966
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
5967
+ output channels, which is also equal to the number of kernels.
5968
+
5969
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
5970
+ input channels, which is also equal to the number of channels in the convolutional kernels.
5971
+
5972
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
5973
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
5974
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
5975
+ channel in the :math:`i`-th batch of the input feature map.
5976
+
5977
+ The shape of the convolutional kernel is given by
5978
+ :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
5979
+ where :math:`kernel\_size[0]` , :math:`kernel\_size[1]` and :math:`kernel\_size[2]` are the depth,
5980
+ height and width of the kernel, respectively.
5981
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5982
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
5983
+ \text{kernel_size[1]}, \text{kernel_size[2]})`,
5984
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
5985
+
5986
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5987
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
5783
5988
 
5784
5989
  Note:
5785
5990
  1. On Ascend platform, `groups = 1` must be satisfied.
@@ -5790,8 +5995,8 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5790
5995
  weight (Tensor): Set size of kernel is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]},
5791
5996
  \text{kernel_size[2]})`, then the shape is :math:`(C_{out}, C_{in}, \text{kernel_size[0]},
5792
5997
  \text{kernel_size[1]}, \text{kernel_size[1]})`.
5793
- bias (Tensor): Bias Tensor with shape :math:`(C_{out})`. When bias is None, zeros will be used. Default:
5794
- ``None`` .
5998
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
5999
+ When bias is None, zeros will be used. Default: ``None`` .
5795
6000
  stride (Union[int, tuple[int]], optional): The distance of kernel moving,
5796
6001
  it can be an int number that represents
5797
6002
  the depth, height and width of movement or a tuple of three int numbers that
@@ -5799,18 +6004,18 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5799
6004
  pad_mode (str, optional): Specifies padding mode. The optional values are
5800
6005
  ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
5801
6006
 
5802
- - same: Adopts the way of completion. The depth, height and width of the output will be equal to
6007
+ - ``"same"``: Adopts the way of completion. The depth, height and width of the output will be equal to
5803
6008
  the input `x` divided by stride. The padding will be evenly calculated in head and tail, top and bottom,
5804
6009
  left and right directions possiblily.
5805
6010
  Otherwise, the last extra padding will be calculated from the tail, bottom and the right side.
5806
6011
  If this mode is set, `pad` must be 0.
5807
6012
 
5808
- - valid: Adopts the way of discarding. The possible largest depth, height and width of output
6013
+ - ``"valid"``: Adopts the way of discarding. The possible largest depth, height and width of output
5809
6014
  will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
5810
6015
  must be 0.
5811
6016
 
5812
- - pad: Implicit paddings on both sides of the input in depth, height and width. The number of `pad` will
5813
- be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
6017
+ - ``"pad"``: Implicit paddings on both sides of the input in depth, height and width.
6018
+ The number of `pad` will be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
5814
6019
 
5815
6020
  padding (Union[int, tuple[int], list[int]], optional): The pad value to be filled. If `pad` is an integer,
5816
6021
  the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
@@ -5828,7 +6033,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5828
6033
  Returns:
5829
6034
  Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
5830
6035
 
5831
- `pad_mode` is 'same':
6036
+ `pad_mode` is ``"same"``:
5832
6037
 
5833
6038
  .. math::
5834
6039
  \begin{array}{ll} \\
@@ -5837,7 +6042,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5837
6042
  W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
5838
6043
  \end{array}
5839
6044
 
5840
- `pad_mode` is 'valid':
6045
+ `pad_mode` is ``"valid"``:
5841
6046
 
5842
6047
  .. math::
5843
6048
  \begin{array}{ll} \\
@@ -5849,7 +6054,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5849
6054
  {\text{stride[2]}} + 1} \right \rfloor \\
5850
6055
  \end{array}
5851
6056
 
5852
- `pad_mode` is 'pad':
6057
+ `pad_mode` is ``"pad"``:
5853
6058
 
5854
6059
  .. math::
5855
6060
  \begin{array}{ll} \\
@@ -6082,7 +6287,7 @@ def glu(x, axis=-1):
6082
6287
  TypeError: If `x` is not a Tensor.
6083
6288
 
6084
6289
  Supported Platforms:
6085
- ``Ascend`` ``CPU``
6290
+ ``Ascend`` ``GPU`` ``CPU``
6086
6291
 
6087
6292
  Examples:
6088
6293
  >>> from mindspore import Tensor, ops
@@ -6128,12 +6333,12 @@ def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean
6128
6333
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6129
6334
  ``'sum'`` . Default: ``'mean'`` .
6130
6335
 
6131
- - ``'none'`` : no reduction will be applied.
6132
- - ``'mean'`` : the sum of the output will be divided by the number of elements in the output.
6133
- - ``'sum'`` : the output will be summed.
6336
+ - ``'none'``: no reduction will be applied.
6337
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
6338
+ - ``'sum'``: the output elements will be summed.
6134
6339
 
6135
6340
  Returns:
6136
- Tensor. If `reduction` is 'none', returns a Tensor with the same shape as `target`.
6341
+ Tensor. If `reduction` is ``'none'``, returns a Tensor with the same shape as `target`.
6137
6342
  Otherwise, it is a scalar.
6138
6343
 
6139
6344
  Raises:
@@ -6200,13 +6405,14 @@ def multilabel_margin_loss(input, target, reduction='mean'):
6200
6405
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6201
6406
  ``'sum'`` . Default: ``'mean'`` .
6202
6407
 
6203
- - ``'none'`` : no reduction will be applied.
6204
- - ``'mean'`` : the sum of the output will be divided by the number of elements in the output.
6205
- - ``'sum'`` : the output will be summed.
6408
+ - ``'none'``: no reduction will be applied.
6409
+ - ``'mean'``: compute and return the mean of elements in the output.
6410
+ - ``'sum'``: the output elements will be summed.
6206
6411
 
6207
6412
  Returns:
6208
- - **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is "none", its shape
6209
- is :math:`(N)`. Otherwise, a scalar value will be returned.
6413
+ - **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss.
6414
+ If `reduction` is ``"none"``, its shape is :math:`(N)`.
6415
+ Otherwise, a scalar value will be returned.
6210
6416
 
6211
6417
  Raises:
6212
6418
  TypeError: If `input` or `target` is not a Tensor.
@@ -6214,7 +6420,7 @@ def multilabel_margin_loss(input, target, reduction='mean'):
6214
6420
  TypeError: If dtype of `target` is not int32.
6215
6421
  ValueError: If length of shape of `input` is neither 1 nor 2.
6216
6422
  ValueError: If shape of `input` is not the same as `target`.
6217
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
6423
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
6218
6424
 
6219
6425
  Supported Platforms:
6220
6426
  ``Ascend`` ``GPU``
@@ -6260,12 +6466,15 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
6260
6466
  input (Tensor): A tensor of shape :math:`(N, C)` , where N is batch size and C is number of classes.
6261
6467
  target (Tensor): The label target Tensor which has the same shape as `input`.
6262
6468
  weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: ``None``.
6263
- reduction (str): Specifies which reduction to be applied to the output. It must be one of
6264
- ``'none'`` , ``'mean'`` , and ``'sum'`` , meaning no reduction, reduce mean and sum on output, respectively.
6265
- Default: ``'mean'`` .
6469
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6470
+ ``'sum'`` . Default: ``'mean'`` .
6471
+
6472
+ - ``'none'``: no reduction will be applied.
6473
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
6474
+ - ``'sum'``: the output elements will be summed.
6266
6475
 
6267
6476
  Returns:
6268
- Tensor, the data type is the same as input, if the reduction is 'none',
6477
+ Tensor, the data type is the same as input, if the `reduction` is ``'none'``,
6269
6478
  its shape is :math:`(N)` , otherwise it is zero.
6270
6479
 
6271
6480
  Raises:
@@ -6409,15 +6618,15 @@ def gelu(input_x, approximate='none'):
6409
6618
 
6410
6619
  x_dtype = _get_cache_prim(P.DType)()(input_x)
6411
6620
  if x_dtype not in [mstype.float16, mstype.float32, mstype.float64]:
6412
- raise TypeError("For gelu, the input dtype must be float16, float32 or float64, "
6413
- "but got {}.".format(x_dtype))
6621
+ raise TypeError(f"For gelu, the input dtype must be float16, float32 or float64, "
6622
+ f"but got {x_dtype}.")
6414
6623
  if approximate == 'tanh':
6415
6624
  output = _get_cache_prim(P.GeLU)()(input_x)
6416
6625
  else:
6417
- output = _get_cache_prim(P.Sqrt)()(Tensor(2.0))
6626
+ output = _get_cache_prim(P.Sqrt)()(Tensor(2.0, x_dtype))
6418
6627
  output = _get_cache_prim(P.Div)()(input_x, output)
6419
- output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0)
6420
- output = input_x * output * Tensor(0.5)
6628
+ output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0, x_dtype)
6629
+ output = input_x * output * Tensor(0.5, x_dtype)
6421
6630
 
6422
6631
  return output
6423
6632
 
@@ -6655,8 +6864,12 @@ def mse_loss(input, target, reduction='mean'):
6655
6864
  target (Tensor): The input label. Tensor of any dimension, same shape as the `input` in common cases.
6656
6865
  However, it supports that the shape of `input` is different from the shape of `target`
6657
6866
  and they should be broadcasted to each other.
6658
- reduction (str, optional): Type of reduction to be applied to loss.
6659
- The optional values are ``"mean"`` , ``"none"`` and ``"sum"`` . Default: ``'mean'`` .
6867
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6868
+ ``'sum'`` . Default: ``'mean'`` .
6869
+
6870
+ - ``'none'``: no reduction will be applied.
6871
+ - ``'mean'``: compute and return the mean of elements in the output.
6872
+ - ``'sum'``: the output elements will be summed.
6660
6873
 
6661
6874
  Returns:
6662
6875
  Tensor, loss of type float, the shape is zero if `reduction` is ``'mean'`` or ``'sum'`` ,
@@ -6759,11 +6972,15 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
6759
6972
  eps (float, optional): Add small value to avoid division by zero. Default: ``1e-06``.
6760
6973
  swap (bool, optional): The distance swap change the negative distance to the distance between positive
6761
6974
  sample and negative sample. Default: ``False`` .
6762
- reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` , ``'sum'`` .
6763
- Default: ``'mean'`` .
6975
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6976
+ ``'sum'`` . Default: ``'mean'`` .
6977
+
6978
+ - ``'none'``: no reduction will be applied.
6979
+ - ``'mean'``: compute and return the mean of elements in the output.
6980
+ - ``'sum'``: the output elements will be summed.
6764
6981
 
6765
6982
  Returns:
6766
- Tensor. If `reduction` is "none", its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
6983
+ Tensor. If `reduction` is ``"none"``, its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
6767
6984
 
6768
6985
  Raises:
6769
6986
  TypeError: If `anchor` or `positive` or 'negative' is not a Tensor.
@@ -6776,7 +6993,7 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
6776
6993
  same time.
6777
6994
  ValueError: If the dimension of input `anchor` or `positive` or `negative` is bigger than or equal to 8.
6778
6995
  ValueError: If shape of `anchor`, `positive` and `negative` cannot broadcast.
6779
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
6996
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
6780
6997
 
6781
6998
  Supported Platforms:
6782
6999
  ``GPU``
@@ -6811,7 +7028,7 @@ def linear(x, w, b):
6811
7028
  def _inner_dropout(x, p, training):
6812
7029
  """inner dropout"""
6813
7030
  _dropout = _get_cache_prim(P.Dropout)(1 - p)
6814
- if p > 0. and training:
7031
+ if 0. < p <= 1. and training:
6815
7032
  return _dropout(x)[0]
6816
7033
  return x
6817
7034
 
@@ -6864,10 +7081,11 @@ def _in_projection_packed(q, k, v, w, b, k_is_v, q_is_k):
6864
7081
  return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
6865
7082
 
6866
7083
 
6867
- def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training):
7084
+ def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training, dtype):
6868
7085
  """scaled dot product attention"""
6869
7086
  embed_size = query.shape[-1]
6870
- scaling_factor = Tensor(embed_size, mstype.float32).sqrt().sqrt()
7087
+ embed_size_tensor = scalar_to_tensor_(embed_size, dtype)
7088
+ scaling_factor = embed_size_tensor.sqrt().sqrt()
6871
7089
  query = query / scaling_factor
6872
7090
 
6873
7091
  if is_causal:
@@ -6960,7 +7178,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
6960
7178
  out_proj_bias, training=True, key_padding_mask=None, attn_mask=None,
6961
7179
  use_separate_proj_weight=False, q_proj_weight=None, k_proj_weight=None,
6962
7180
  v_proj_weight=None, static_k=None, static_v=None, average_attn_weights=True,
6963
- is_causal=False, k_is_v=False, q_is_k=False):
7181
+ is_causal=False, k_is_v=False, q_is_k=False, dtype=mstype.float32):
6964
7182
  """multi head attetion forward function"""
6965
7183
  is_batched = _check_qkv_shape(query.ndim, key.ndim, value.ndim)
6966
7184
  if key_padding_mask is not None:
@@ -7117,7 +7335,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
7117
7335
  v = v.view((bsz, num_heads, src_len, head_dim))
7118
7336
 
7119
7337
  attn_output, attn_output_weights = _scaled_dot_product_attention(
7120
- q, k, v, attn_mask, dropout_p, is_causal, training)
7338
+ q, k, v, attn_mask, dropout_p, is_causal, training, dtype)
7121
7339
  attn_output = attn_output.transpose(2, 0, 1, 3).view((bsz * tgt_len, embed_dim))
7122
7340
 
7123
7341
  attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
@@ -7260,6 +7478,7 @@ __all__ = [
7260
7478
  'softsign',
7261
7479
  'softshrink',
7262
7480
  'soft_shrink',
7481
+ 'softplus',
7263
7482
  'selu',
7264
7483
  'silu',
7265
7484
  'soft_margin_loss',