mindspore 2.0.0rc1__cp38-cp38-manylinux1_x86_64.whl → 2.2.0__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (884) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Third_Party_Open_Source_Software_Notice +2 -2
  3. mindspore/__init__.py +5 -2
  4. mindspore/_akg/akg/build_module.py +5 -6
  5. mindspore/_akg/akg/composite/build_module.py +49 -16
  6. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  7. mindspore/_akg/akg/config/repository.json +195 -0
  8. mindspore/_akg/akg/global_configs.py +5 -1
  9. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  10. mindspore/_akg/akg/tvm/api.py +4 -3
  11. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  12. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  13. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  14. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  15. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  16. mindspore/_akg/akg/tvm/build_module.py +16 -1
  17. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  18. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  19. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  20. mindspore/_akg/akg/tvm/module.py +1 -2
  21. mindspore/_akg/akg/tvm/stmt.py +2 -2
  22. mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
  23. mindspore/_akg/akg/utils/kernel_exec.py +58 -260
  24. mindspore/_akg/akg/utils/op_dsl.py +17 -1
  25. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  26. mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
  27. mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
  28. mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
  29. mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
  30. mindspore/_check_jit_forbidden_api.py +5 -1
  31. mindspore/_checkparam.py +79 -62
  32. mindspore/_extends/graph_kernel/__init__.py +0 -1
  33. mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
  34. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  35. mindspore/_extends/graph_kernel/splitter.py +1 -9
  36. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
  37. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
  38. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  39. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
  40. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
  41. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  42. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  43. mindspore/_extends/parse/__init__.py +19 -17
  44. mindspore/_extends/parse/namespace.py +7 -36
  45. mindspore/_extends/parse/parser.py +375 -189
  46. mindspore/_extends/parse/resources.py +36 -41
  47. mindspore/_extends/parse/standard_method.py +350 -245
  48. mindspore/_extends/parse/trope.py +2 -12
  49. mindspore/_extends/remote/kernel_build_server.py +24 -7
  50. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  51. mindspore/_install_custom.py +43 -0
  52. mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
  53. mindspore/amp.py +85 -19
  54. mindspore/bin/cache_admin +0 -0
  55. mindspore/bin/cache_server +0 -0
  56. mindspore/boost/base.py +2 -2
  57. mindspore/boost/boost.py +27 -32
  58. mindspore/boost/boost_cell_wrapper.py +37 -13
  59. mindspore/boost/grad_accumulation.py +1 -1
  60. mindspore/boost/grad_freeze.py +34 -6
  61. mindspore/boost/group_loss_scale_manager.py +15 -14
  62. mindspore/boost/less_batch_normalization.py +28 -3
  63. mindspore/common/__init__.py +15 -11
  64. mindspore/common/_auto_dynamic.py +68 -0
  65. mindspore/common/_jit_fallback_utils.py +111 -0
  66. mindspore/common/_register_for_adapter.py +17 -5
  67. mindspore/common/_register_for_tensor.py +2 -2
  68. mindspore/common/_stub_tensor.py +18 -15
  69. mindspore/common/_utils.py +31 -7
  70. mindspore/common/api.py +269 -101
  71. mindspore/common/auto_dynamic_shape.py +498 -0
  72. mindspore/common/dtype.py +61 -21
  73. mindspore/common/dump.py +9 -7
  74. mindspore/common/initializer.py +106 -76
  75. mindspore/common/jit_config.py +35 -14
  76. mindspore/common/lazy_inline.py +187 -0
  77. mindspore/common/mindir_util.py +101 -0
  78. mindspore/common/mutable.py +10 -13
  79. mindspore/common/parameter.py +246 -55
  80. mindspore/common/seed.py +13 -7
  81. mindspore/common/sparse_tensor.py +29 -33
  82. mindspore/common/tensor.py +907 -251
  83. mindspore/communication/__init__.py +7 -4
  84. mindspore/communication/_comm_helper.py +84 -4
  85. mindspore/communication/management.py +160 -88
  86. mindspore/config/op_info.config +99 -75
  87. mindspore/config/super_bar_config.json +36 -4
  88. mindspore/context.py +526 -219
  89. mindspore/dataset/__init__.py +9 -46
  90. mindspore/dataset/audio/__init__.py +4 -19
  91. mindspore/dataset/audio/transforms.py +545 -233
  92. mindspore/dataset/audio/utils.py +21 -18
  93. mindspore/dataset/callback/ds_callback.py +42 -13
  94. mindspore/dataset/core/config.py +158 -100
  95. mindspore/dataset/core/validator_helpers.py +1 -63
  96. mindspore/dataset/debug/debug_hook.py +45 -13
  97. mindspore/dataset/debug/pre_defined_hook.py +5 -5
  98. mindspore/dataset/engine/__init__.py +0 -5
  99. mindspore/dataset/engine/cache_client.py +38 -15
  100. mindspore/dataset/engine/datasets.py +615 -278
  101. mindspore/dataset/engine/datasets_audio.py +154 -283
  102. mindspore/dataset/engine/datasets_standard_format.py +104 -116
  103. mindspore/dataset/engine/datasets_text.py +443 -326
  104. mindspore/dataset/engine/datasets_user_defined.py +251 -164
  105. mindspore/dataset/engine/datasets_vision.py +839 -1443
  106. mindspore/dataset/engine/iterators.py +11 -4
  107. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
  108. mindspore/dataset/engine/obs/util.py +3 -0
  109. mindspore/dataset/engine/offload.py +6 -6
  110. mindspore/dataset/engine/queue.py +15 -14
  111. mindspore/dataset/engine/samplers.py +39 -23
  112. mindspore/dataset/engine/serializer_deserializer.py +22 -6
  113. mindspore/dataset/engine/validators.py +21 -331
  114. mindspore/dataset/text/__init__.py +5 -33
  115. mindspore/dataset/text/transforms.py +334 -165
  116. mindspore/dataset/text/utils.py +215 -145
  117. mindspore/dataset/transforms/__init__.py +1 -1
  118. mindspore/dataset/transforms/c_transforms.py +3 -2
  119. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  120. mindspore/dataset/transforms/transforms.py +174 -71
  121. mindspore/dataset/utils/browse_dataset.py +25 -17
  122. mindspore/dataset/utils/line_reader.py +24 -21
  123. mindspore/dataset/vision/__init__.py +5 -26
  124. mindspore/dataset/vision/c_transforms.py +177 -165
  125. mindspore/dataset/vision/py_transforms.py +114 -119
  126. mindspore/dataset/vision/py_transforms_util.py +54 -51
  127. mindspore/dataset/vision/transforms.py +1127 -381
  128. mindspore/dataset/vision/utils.py +54 -38
  129. mindspore/dataset/vision/validators.py +12 -2
  130. mindspore/experimental/map_parameter.py +38 -4
  131. mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
  132. mindspore/experimental/optim/adam.py +192 -0
  133. mindspore/experimental/optim/adamw.py +181 -0
  134. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  135. mindspore/experimental/optim/optimizer.py +252 -0
  136. mindspore/experimental/optim/sgd.py +147 -0
  137. mindspore/gen_ops.py +273 -0
  138. mindspore/include/OWNERS +1 -2
  139. mindspore/include/api/context.h +21 -1
  140. mindspore/include/api/data_type.h +2 -1
  141. mindspore/include/api/graph.h +0 -15
  142. mindspore/include/api/kernel.h +2 -0
  143. mindspore/include/api/kernel_api.h +37 -12
  144. mindspore/include/api/model.h +29 -42
  145. mindspore/include/api/model_group.h +14 -3
  146. mindspore/include/api/model_parallel_runner.h +18 -2
  147. mindspore/include/api/serialization.h +26 -0
  148. mindspore/include/api/status.h +1 -0
  149. mindspore/include/api/types.h +38 -4
  150. mindspore/include/c_api/ms/abstract.h +67 -0
  151. mindspore/include/c_api/ms/attribute.h +197 -0
  152. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  153. mindspore/include/c_api/ms/base/macros.h +32 -0
  154. mindspore/include/c_api/ms/base/status.h +33 -0
  155. mindspore/include/c_api/ms/base/types.h +282 -0
  156. mindspore/include/c_api/ms/context.h +102 -0
  157. mindspore/include/c_api/ms/graph.h +160 -0
  158. mindspore/include/c_api/ms/node.h +606 -0
  159. mindspore/include/c_api/ms/tensor.h +161 -0
  160. mindspore/include/c_api/ms/value.h +84 -0
  161. mindspore/include/c_api/status_c.h +3 -0
  162. mindspore/include/dataset/constants.h +6 -12
  163. mindspore/include/dataset/execute.h +23 -13
  164. mindspore/include/dataset/text.h +26 -26
  165. mindspore/include/dataset/transforms.h +25 -31
  166. mindspore/include/dataset/vision.h +60 -60
  167. mindspore/include/dataset/vision_ascend.h +5 -6
  168. mindspore/include/dataset/vision_lite.h +17 -17
  169. mindspore/include/mindapi/base/format.h +0 -1
  170. mindspore/include/mindapi/base/type_id.h +2 -1
  171. mindspore/include/mindapi/base/types.h +5 -1
  172. mindspore/lib/libdnnl.so.2 +0 -0
  173. mindspore/lib/libjemalloc.so.2 +0 -0
  174. mindspore/lib/libmindspore.so +0 -0
  175. mindspore/lib/libmindspore_backend.so +0 -0
  176. mindspore/lib/libmindspore_common.so +0 -0
  177. mindspore/lib/libmindspore_core.so +0 -0
  178. mindspore/lib/libmindspore_glog.so.0 +0 -0
  179. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  180. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  181. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  182. mindspore/lib/libmindspore_shared_lib.so +0 -0
  183. mindspore/lib/libmpi_adapter.so +0 -0
  184. mindspore/lib/libnnacl.so +0 -0
  185. mindspore/lib/libopencv_core.so.4.5 +0 -0
  186. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  187. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  188. mindspore/lib/libps_cache.so +0 -0
  189. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  190. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  191. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
  192. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  193. mindspore/lib/plugin/ascend/libakg.so +0 -0
  194. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  195. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  196. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  197. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  198. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  199. mindspore/lib/plugin/cpu/libakg.so +0 -0
  200. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  201. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  202. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  203. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  204. mindspore/lib/plugin/gpu10.1/libnvidia_collective.so +0 -0
  205. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  206. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  207. mindspore/lib/plugin/gpu11.1/libnvidia_collective.so +0 -0
  208. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  209. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  210. mindspore/lib/plugin/gpu11.6/libnvidia_collective.so +0 -0
  211. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  212. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  213. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  214. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  215. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  216. mindspore/log.py +9 -6
  217. mindspore/mindrecord/filereader.py +33 -4
  218. mindspore/mindrecord/filewriter.py +70 -35
  219. mindspore/mindrecord/mindpage.py +40 -34
  220. mindspore/mindrecord/shardreader.py +1 -1
  221. mindspore/mindrecord/shardsegment.py +1 -1
  222. mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
  223. mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
  224. mindspore/mindrecord/tools/csv_to_mr.py +29 -13
  225. mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
  226. mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
  227. mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
  228. mindspore/nn/cell.py +463 -169
  229. mindspore/nn/dynamic_lr.py +47 -43
  230. mindspore/nn/layer/activation.py +225 -82
  231. mindspore/nn/layer/basic.py +121 -79
  232. mindspore/nn/layer/channel_shuffle.py +21 -21
  233. mindspore/nn/layer/combined.py +33 -26
  234. mindspore/nn/layer/container.py +277 -22
  235. mindspore/nn/layer/conv.py +441 -304
  236. mindspore/nn/layer/dense.py +19 -13
  237. mindspore/nn/layer/embedding.py +62 -49
  238. mindspore/nn/layer/flash_attention.py +264 -0
  239. mindspore/nn/layer/image.py +50 -39
  240. mindspore/nn/layer/math.py +62 -51
  241. mindspore/nn/layer/normalization.py +219 -167
  242. mindspore/nn/layer/padding.py +58 -70
  243. mindspore/nn/layer/pooling.py +334 -287
  244. mindspore/nn/layer/rnn_cells.py +53 -38
  245. mindspore/nn/layer/rnns.py +59 -56
  246. mindspore/nn/layer/thor_layer.py +52 -44
  247. mindspore/nn/layer/timedistributed.py +6 -4
  248. mindspore/nn/layer/transformer.py +284 -164
  249. mindspore/nn/learning_rate_schedule.py +34 -25
  250. mindspore/nn/loss/__init__.py +3 -2
  251. mindspore/nn/loss/loss.py +554 -311
  252. mindspore/nn/optim/ada_grad.py +12 -9
  253. mindspore/nn/optim/adadelta.py +14 -11
  254. mindspore/nn/optim/adafactor.py +19 -16
  255. mindspore/nn/optim/adam.py +62 -47
  256. mindspore/nn/optim/adamax.py +13 -10
  257. mindspore/nn/optim/adasum.py +12 -8
  258. mindspore/nn/optim/asgd.py +10 -9
  259. mindspore/nn/optim/ftrl.py +20 -17
  260. mindspore/nn/optim/lamb.py +16 -12
  261. mindspore/nn/optim/lars.py +8 -6
  262. mindspore/nn/optim/lazyadam.py +25 -20
  263. mindspore/nn/optim/momentum.py +10 -7
  264. mindspore/nn/optim/optimizer.py +61 -9
  265. mindspore/nn/optim/proximal_ada_grad.py +14 -13
  266. mindspore/nn/optim/rmsprop.py +17 -13
  267. mindspore/nn/optim/rprop.py +30 -17
  268. mindspore/nn/optim/sgd.py +40 -23
  269. mindspore/nn/optim/thor.py +24 -26
  270. mindspore/nn/probability/bijector/bijector.py +11 -11
  271. mindspore/nn/probability/bijector/exp.py +1 -1
  272. mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
  273. mindspore/nn/probability/bijector/invert.py +1 -1
  274. mindspore/nn/probability/bijector/power_transform.py +29 -29
  275. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  276. mindspore/nn/probability/bijector/softplus.py +5 -5
  277. mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
  278. mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
  279. mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
  280. mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
  281. mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
  282. mindspore/nn/probability/distribution/_utils/utils.py +1 -1
  283. mindspore/nn/probability/distribution/bernoulli.py +9 -9
  284. mindspore/nn/probability/distribution/beta.py +8 -8
  285. mindspore/nn/probability/distribution/categorical.py +23 -15
  286. mindspore/nn/probability/distribution/cauchy.py +5 -6
  287. mindspore/nn/probability/distribution/distribution.py +3 -3
  288. mindspore/nn/probability/distribution/exponential.py +4 -4
  289. mindspore/nn/probability/distribution/gamma.py +10 -10
  290. mindspore/nn/probability/distribution/geometric.py +8 -8
  291. mindspore/nn/probability/distribution/gumbel.py +8 -9
  292. mindspore/nn/probability/distribution/half_normal.py +5 -5
  293. mindspore/nn/probability/distribution/laplace.py +5 -5
  294. mindspore/nn/probability/distribution/log_normal.py +12 -11
  295. mindspore/nn/probability/distribution/logistic.py +8 -8
  296. mindspore/nn/probability/distribution/normal.py +6 -5
  297. mindspore/nn/probability/distribution/poisson.py +10 -11
  298. mindspore/nn/probability/distribution/student_t.py +8 -9
  299. mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
  300. mindspore/nn/probability/distribution/uniform.py +11 -11
  301. mindspore/nn/reinforcement/tensor_array.py +2 -2
  302. mindspore/nn/sparse/sparse.py +9 -9
  303. mindspore/nn/wrap/cell_wrapper.py +188 -63
  304. mindspore/nn/wrap/grad_reducer.py +21 -12
  305. mindspore/nn/wrap/loss_scale.py +136 -49
  306. mindspore/numpy/__init__.py +4 -4
  307. mindspore/numpy/array_creations.py +55 -56
  308. mindspore/numpy/array_ops.py +134 -35
  309. mindspore/numpy/logic_ops.py +66 -20
  310. mindspore/numpy/math_ops.py +142 -139
  311. mindspore/numpy/utils_const.py +2 -2
  312. mindspore/offline_debug/convert_async.py +2 -2
  313. mindspore/ops/_grad_experimental/__init__.py +7 -5
  314. mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
  315. mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
  316. mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
  317. mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
  318. mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
  319. mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
  320. mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
  321. mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
  322. mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
  323. mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
  324. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
  325. mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
  326. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  327. mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
  328. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
  329. mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
  330. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
  331. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
  332. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
  333. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
  334. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  335. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
  336. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
  337. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
  338. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  339. mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
  340. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
  341. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  342. mindspore/ops/_op_impl/aicpu/cast.py +52 -0
  343. mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
  344. mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
  345. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  346. mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
  347. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  348. mindspore/ops/_op_impl/aicpu/eye.py +4 -4
  349. mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
  350. mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
  351. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  352. mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
  353. mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
  354. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  355. mindspore/ops/_op_impl/aicpu/lu.py +39 -0
  356. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  357. mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
  358. mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
  359. mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
  360. mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
  361. mindspore/ops/_op_impl/aicpu/median.py +1 -0
  362. mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
  363. mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
  364. mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
  365. mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
  366. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  367. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  368. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  369. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  370. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  371. mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
  372. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
  373. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
  374. mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
  375. mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
  376. mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
  377. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  378. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  379. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
  380. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
  381. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  382. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  383. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  384. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  385. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  386. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
  387. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
  388. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
  389. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
  390. mindspore/ops/_op_impl/tbe/__init__.py +6 -4
  391. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  392. mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
  393. mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
  394. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
  395. mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
  396. mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
  397. mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
  398. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  399. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
  400. mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
  401. mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
  402. mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
  403. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
  404. mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
  405. mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
  406. mindspore/ops/_op_impl/tbe/im2col.py +4 -4
  407. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  408. mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
  409. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
  410. mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
  411. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  412. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
  413. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  414. mindspore/ops/_primitive_cache.py +1 -1
  415. mindspore/ops/_tracefunc.py +241 -0
  416. mindspore/ops/_utils/utils.py +10 -2
  417. mindspore/ops/_vmap/vmap_array_ops.py +5 -3
  418. mindspore/ops/_vmap/vmap_base.py +5 -4
  419. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  420. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  421. mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
  422. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  423. mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
  424. mindspore/ops/arg_dtype_cast.py +54 -0
  425. mindspore/ops/composite/__init__.py +7 -5
  426. mindspore/ops/composite/base.py +78 -34
  427. mindspore/ops/composite/math_ops.py +5 -695
  428. mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
  429. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
  430. mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
  431. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  432. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  433. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
  434. mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
  435. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
  436. mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
  437. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
  438. mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
  439. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
  440. mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
  441. mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
  442. mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
  443. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
  444. mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
  445. mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
  446. mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
  447. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  448. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  449. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
  450. mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
  451. mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
  452. mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
  453. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  454. mindspore/ops/deprecated.py +304 -0
  455. mindspore/ops/function/__init__.py +41 -4
  456. mindspore/ops/function/array_func.py +1108 -467
  457. mindspore/ops/function/clip_func.py +94 -27
  458. mindspore/ops/function/debug_func.py +3 -1
  459. mindspore/ops/function/grad/grad_func.py +82 -73
  460. mindspore/ops/function/image_func.py +28 -12
  461. mindspore/ops/function/linalg_func.py +135 -39
  462. mindspore/ops/function/math_func.py +3779 -894
  463. mindspore/ops/function/nn_func.py +1584 -657
  464. mindspore/ops/function/parameter_func.py +13 -3
  465. mindspore/ops/function/random_func.py +247 -153
  466. mindspore/ops/function/sparse_func.py +14 -11
  467. mindspore/ops/function/sparse_unary_func.py +173 -47
  468. mindspore/ops/function/spectral_func.py +8 -4
  469. mindspore/ops/function/vmap_func.py +8 -7
  470. mindspore/ops/functional.py +47 -16
  471. mindspore/ops/op_info_register.py +346 -86
  472. mindspore/ops/operations/__init__.py +38 -22
  473. mindspore/ops/operations/_grad_ops.py +145 -149
  474. mindspore/ops/operations/_inner_ops.py +298 -56
  475. mindspore/ops/operations/_ms_kernel.py +3 -3
  476. mindspore/ops/operations/_quant_ops.py +24 -28
  477. mindspore/ops/operations/_rl_inner_ops.py +9 -7
  478. mindspore/ops/operations/_scalar_ops.py +115 -0
  479. mindspore/ops/operations/_sequence_ops.py +148 -10
  480. mindspore/ops/operations/_tensor_array.py +1 -1
  481. mindspore/ops/operations/_thor_ops.py +2 -2
  482. mindspore/ops/operations/array_ops.py +1239 -561
  483. mindspore/ops/operations/comm_ops.py +166 -90
  484. mindspore/ops/operations/control_ops.py +3 -3
  485. mindspore/ops/operations/custom_ops.py +124 -102
  486. mindspore/ops/operations/debug_ops.py +24 -11
  487. mindspore/ops/operations/image_ops.py +86 -71
  488. mindspore/ops/operations/inner_ops.py +18 -13
  489. mindspore/ops/operations/linalg_ops.py +30 -11
  490. mindspore/ops/operations/math_ops.py +1730 -435
  491. mindspore/ops/operations/nn_ops.py +1953 -943
  492. mindspore/ops/operations/other_ops.py +65 -43
  493. mindspore/ops/operations/random_ops.py +258 -98
  494. mindspore/ops/operations/rl_ops.py +4 -36
  495. mindspore/ops/operations/sparse_ops.py +38 -33
  496. mindspore/ops/operations/spectral_ops.py +8 -4
  497. mindspore/ops/primitive.py +66 -44
  498. mindspore/ops/signature.py +5 -5
  499. mindspore/parallel/_auto_parallel_context.py +80 -19
  500. mindspore/parallel/_cost_model_context.py +42 -0
  501. mindspore/parallel/_offload_context.py +162 -72
  502. mindspore/parallel/_parallel_serialization.py +2 -2
  503. mindspore/parallel/_ps_context.py +16 -4
  504. mindspore/parallel/_recovery_context.py +2 -1
  505. mindspore/parallel/_tensor.py +15 -13
  506. mindspore/parallel/_transformer/layers.py +8 -6
  507. mindspore/parallel/_transformer/loss.py +1 -0
  508. mindspore/parallel/_transformer/moe.py +7 -7
  509. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  510. mindspore/parallel/_transformer/transformer.py +34 -14
  511. mindspore/parallel/_utils.py +36 -14
  512. mindspore/parallel/algo_parameter_config.py +114 -20
  513. mindspore/parallel/checkpoint_transform.py +16 -18
  514. mindspore/parallel/shard.py +16 -13
  515. mindspore/profiler/__init__.py +1 -1
  516. mindspore/profiler/common/struct_type.py +3 -3
  517. mindspore/profiler/common/util.py +3 -2
  518. mindspore/profiler/envprofiling.py +11 -4
  519. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  520. mindspore/profiler/parser/ascend_flops_generator.py +94 -0
  521. mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
  522. mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
  523. mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
  524. mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
  525. mindspore/profiler/parser/ascend_op_generator.py +276 -0
  526. mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
  527. mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
  528. mindspore/profiler/parser/base_timeline_generator.py +11 -7
  529. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
  530. mindspore/profiler/parser/flops_parser.py +15 -11
  531. mindspore/profiler/parser/framework_parser.py +92 -73
  532. mindspore/profiler/parser/hccl_parser.py +16 -12
  533. mindspore/profiler/parser/integrator.py +22 -11
  534. mindspore/profiler/parser/memory_usage_parser.py +36 -11
  535. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  536. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  537. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  538. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  539. mindspore/profiler/parser/optime_parser.py +1 -1
  540. mindspore/profiler/parser/profiler_info.py +4 -5
  541. mindspore/profiler/parser/step_trace_parser.py +11 -14
  542. mindspore/profiler/profiling.py +678 -377
  543. mindspore/rewrite/api/node.py +211 -54
  544. mindspore/rewrite/api/node_type.py +5 -0
  545. mindspore/rewrite/api/pattern_engine.py +22 -23
  546. mindspore/rewrite/api/scoped_value.py +20 -17
  547. mindspore/rewrite/api/symbol_tree.py +252 -106
  548. mindspore/rewrite/api/tree_node_helper.py +3 -0
  549. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  550. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  551. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  552. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
  553. mindspore/rewrite/common/rewrite_elog.py +5 -1
  554. mindspore/rewrite/namer.py +51 -51
  555. mindspore/rewrite/namespace.py +14 -5
  556. mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
  557. mindspore/rewrite/node/call_function.py +79 -0
  558. mindspore/rewrite/node/cell_container.py +135 -0
  559. mindspore/rewrite/node/control_flow.py +88 -0
  560. mindspore/rewrite/{node.py → node/node.py} +313 -247
  561. mindspore/rewrite/node/node_manager.py +254 -0
  562. mindspore/rewrite/node/node_topological_manager.py +243 -0
  563. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  564. mindspore/rewrite/parsers/assign_parser.py +225 -239
  565. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  566. mindspore/rewrite/parsers/class_def_parser.py +179 -218
  567. mindspore/rewrite/parsers/constant_parser.py +9 -6
  568. mindspore/rewrite/parsers/container_parser.py +9 -7
  569. mindspore/rewrite/parsers/for_parser.py +36 -15
  570. mindspore/rewrite/parsers/function_def_parser.py +23 -20
  571. mindspore/rewrite/parsers/if_parser.py +28 -24
  572. mindspore/rewrite/parsers/module_parser.py +202 -25
  573. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  574. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  575. mindspore/rewrite/parsers/return_parser.py +6 -6
  576. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  577. mindspore/rewrite/sparsify/sparsify.py +4 -1
  578. mindspore/rewrite/sparsify/utils.py +11 -5
  579. mindspore/rewrite/symbol_tree.py +577 -732
  580. mindspore/rewrite/symbol_tree_builder.py +9 -175
  581. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  582. mindspore/run_check/_check_version.py +46 -39
  583. mindspore/run_check/run_check.py +3 -2
  584. mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
  585. mindspore/safeguard/rewrite_obfuscation.py +517 -0
  586. mindspore/scipy/__init__.py +1 -1
  587. mindspore/scipy/linalg.py +67 -61
  588. mindspore/scipy/ops.py +5 -41
  589. mindspore/scipy/ops_grad.py +3 -2
  590. mindspore/scipy/ops_wrapper.py +5 -5
  591. mindspore/scipy/optimize/line_search.py +8 -8
  592. mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
  593. mindspore/scipy/optimize/minimize.py +16 -12
  594. mindspore/scipy/utils.py +1 -52
  595. mindspore/scipy/utils_const.py +4 -4
  596. mindspore/train/__init__.py +4 -4
  597. mindspore/train/_utils.py +13 -5
  598. mindspore/train/amp.py +410 -148
  599. mindspore/train/anf_ir_pb2.py +16 -4
  600. mindspore/train/callback/_backup_and_restore.py +8 -11
  601. mindspore/train/callback/_callback.py +80 -3
  602. mindspore/train/callback/_checkpoint.py +82 -51
  603. mindspore/train/callback/_early_stop.py +12 -15
  604. mindspore/train/callback/_history.py +1 -1
  605. mindspore/train/callback/_lambda_callback.py +13 -13
  606. mindspore/train/callback/_landscape.py +21 -17
  607. mindspore/train/callback/_loss_monitor.py +9 -10
  608. mindspore/train/callback/_on_request_exit.py +16 -33
  609. mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
  610. mindspore/train/callback/_summary_collector.py +44 -30
  611. mindspore/train/callback/_time_monitor.py +62 -12
  612. mindspore/train/data_sink.py +10 -16
  613. mindspore/train/dataset_helper.py +154 -86
  614. mindspore/train/loss_scale_manager.py +14 -9
  615. mindspore/train/metrics/__init__.py +10 -2
  616. mindspore/train/metrics/accuracy.py +1 -1
  617. mindspore/train/metrics/auc.py +1 -1
  618. mindspore/train/metrics/bleu_score.py +2 -2
  619. mindspore/train/metrics/confusion_matrix.py +14 -14
  620. mindspore/train/metrics/cosine_similarity.py +3 -3
  621. mindspore/train/metrics/dice.py +1 -1
  622. mindspore/train/metrics/fbeta.py +1 -1
  623. mindspore/train/metrics/hausdorff_distance.py +8 -6
  624. mindspore/train/metrics/mean_surface_distance.py +5 -4
  625. mindspore/train/metrics/metric.py +49 -17
  626. mindspore/train/metrics/occlusion_sensitivity.py +4 -4
  627. mindspore/train/metrics/perplexity.py +1 -1
  628. mindspore/train/metrics/precision.py +2 -2
  629. mindspore/train/metrics/recall.py +2 -3
  630. mindspore/train/metrics/roc.py +7 -7
  631. mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
  632. mindspore/train/metrics/topk.py +7 -4
  633. mindspore/train/mind_ir_pb2.py +193 -48
  634. mindspore/train/model.py +377 -133
  635. mindspore/train/serialization.py +697 -245
  636. mindspore/train/summary/_summary_adapter.py +5 -2
  637. mindspore/train/summary/_writer_pool.py +4 -3
  638. mindspore/train/summary/summary_record.py +25 -23
  639. mindspore/train/train_thor/convert_utils.py +39 -23
  640. mindspore/train/train_thor/dataset_helper.py +4 -3
  641. mindspore/train/train_thor/model_thor.py +8 -8
  642. mindspore/version.py +1 -1
  643. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
  644. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +647 -818
  645. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
  646. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  647. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  648. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  649. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  650. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  651. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  652. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  653. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  654. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  655. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  656. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  657. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  658. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  659. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  660. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  661. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  662. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  663. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  664. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  665. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  666. mindspore/_extends/graph_kernel/expander.py +0 -80
  667. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
  668. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  669. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  670. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  671. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  672. mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
  673. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  674. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  675. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  676. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  677. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  678. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  679. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  680. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  681. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  682. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  683. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  684. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  685. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  686. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  687. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  688. mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
  689. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  690. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  691. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  692. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  693. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  694. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  695. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  696. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  697. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  698. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  699. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  700. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  701. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  702. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  703. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  704. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  705. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  706. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  707. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  708. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  709. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  710. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  711. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  712. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  713. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  714. mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
  715. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  716. mindspore/_extends/parse/jit_fallback_modules.py +0 -51
  717. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  718. mindspore/dataset/engine/graphdata.py +0 -1586
  719. mindspore/include/api/net.h +0 -142
  720. mindspore/ops/_grad/grad_array_ops.py +0 -1347
  721. mindspore/ops/_grad/grad_clip_ops.py +0 -84
  722. mindspore/ops/_grad/grad_debug_ops.py +0 -68
  723. mindspore/ops/_grad/grad_inner_ops.py +0 -235
  724. mindspore/ops/_grad/grad_math_ops.py +0 -1684
  725. mindspore/ops/_grad/grad_nn_ops.py +0 -1529
  726. mindspore/ops/_grad/grad_other_ops.py +0 -89
  727. mindspore/ops/_grad/grad_sequence_ops.py +0 -296
  728. mindspore/ops/_grad/grad_sparse.py +0 -323
  729. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
  730. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
  731. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  732. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  733. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  734. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
  735. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
  736. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
  737. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
  738. mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
  739. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
  740. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
  741. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  742. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
  743. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  744. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
  745. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  746. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
  747. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
  748. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
  749. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  750. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  751. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
  752. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
  753. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
  754. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
  755. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
  756. mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
  757. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
  758. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
  759. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
  760. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  761. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
  762. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  763. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  764. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
  765. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
  766. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
  767. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  768. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  769. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  770. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
  771. mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
  772. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  773. mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
  774. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
  775. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
  776. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
  777. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
  778. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
  779. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
  780. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  781. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
  782. mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
  783. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
  784. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
  785. mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
  786. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  787. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
  788. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
  789. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
  790. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
  791. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
  792. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
  793. mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
  794. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  795. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
  796. mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
  797. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
  798. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
  799. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
  800. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
  801. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
  802. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
  803. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
  804. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
  805. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
  806. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
  807. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  808. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  809. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  810. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
  811. mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
  812. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  813. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  814. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
  815. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
  816. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
  817. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
  818. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  819. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  820. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  821. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
  822. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
  823. mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
  824. mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
  825. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
  826. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  827. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
  828. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
  829. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
  830. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
  831. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
  832. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
  833. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
  834. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
  835. mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
  836. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  837. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  838. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
  839. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
  840. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
  841. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  842. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
  843. mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
  844. mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
  845. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
  846. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  847. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
  848. mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
  849. mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
  850. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
  851. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  852. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
  853. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
  854. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  855. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
  856. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
  857. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  858. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  859. mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
  860. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
  861. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
  862. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
  863. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
  864. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  865. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
  866. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
  867. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
  868. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
  869. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  870. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  871. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
  872. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
  873. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
  874. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
  875. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
  876. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
  877. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
  878. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
  879. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  880. mindspore/rewrite/node_visitor.py +0 -44
  881. mindspore/rewrite/topological_manager.py +0 -203
  882. mindspore/scipy/sparse/linalg.py +0 -192
  883. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
  884. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
@@ -16,8 +16,8 @@
16
16
  """Defines nn operators with functional form."""
17
17
  from __future__ import absolute_import
18
18
  from math import pi, log
19
- import numpy as np
20
19
 
20
+ from mindspore import context
21
21
  from mindspore import log as logger
22
22
  import mindspore.ops as ops
23
23
  from mindspore.ops.primitive import constexpr, _primexpr
@@ -27,7 +27,7 @@ from mindspore.ops.operations import nn_ops as NN_OPS
27
27
  from mindspore.ops.operations import _sequence_ops as seq
28
28
  import mindspore.common.dtype as mstype
29
29
  from mindspore.ops.function.math_func import logsumexp
30
- from mindspore.ops.function.random_func import _get_seed
30
+ from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
31
31
  from mindspore.common.tensor import Tensor
32
32
  from mindspore._c_expression import Tensor as Tensor_
33
33
  from mindspore.ops._primitive_cache import _get_cache_prim
@@ -39,6 +39,8 @@ from mindspore.ops.operations.nn_ops import PadV3
39
39
  from mindspore.ops.operations.nn_ops import ChannelShuffle
40
40
  from mindspore.ops.operations.nn_ops import TripletMarginLoss
41
41
  from mindspore.ops.operations._inner_ops import SiLU
42
+ from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
43
+ from mindspore.common.api import _function_forbid_reuse
42
44
 
43
45
  slice_ = P.Slice()
44
46
  fast_gelu_ = P.FastGeLU()
@@ -47,12 +49,19 @@ hardswish_ = P.HSwish()
47
49
  mish_ = NN_OPS.Mish()
48
50
  selu_ = NN_OPS.SeLU()
49
51
  scalar_to_tensor_ = P.ScalarToTensor()
52
+ list_to_tensor_ = ListToTensor()
53
+ tuple_to_tensor_ = TupleToTensor()
54
+ tensor_to_tuple_ = TensorToTuple()
55
+ cast_ = P.Cast()
50
56
  sigmoid_ = NN_OPS.Sigmoid()
51
- check_positive_int_const = constexpr(validator.check_positive_int)
52
- check_positive_int_sequence_const = constexpr(validator.check_positive_int_sequence)
53
- check_positive_float_const = constexpr(validator.check_positive_float)
54
- check_positive_float_sequence_const = constexpr(validator.check_positive_float_sequence)
57
+ check_positive_int_const = validator.check_positive_int
58
+ check_positive_int_sequence_const = validator.check_positive_int_sequence
59
+ check_positive_float_const = validator.check_positive_float
60
+ check_positive_float_sequence_const = validator.check_positive_float_sequence
55
61
  check_bool_const = constexpr(validator.check_bool)
62
+ check_int_const = validator.check_is_int
63
+ check_non_negative_float_const = validator.check_non_negative_float
64
+ check_string_const = constexpr(validator.check_string)
56
65
 
57
66
 
58
67
  def adaptive_avg_pool2d(input, output_size):
@@ -76,10 +85,13 @@ def adaptive_avg_pool2d(input, output_size):
76
85
  * (w_{end}- w_{start})}
77
86
  \end{align}
78
87
 
88
+ .. warning::
89
+ This is an experimental API that is subject to change or deletion.
90
+
79
91
  Args:
80
92
  input (Tensor): The input of adaptive_avg_pool2d, which is a 3D or 4D tensor,
81
93
  with float16, float32 or float64 data type.
82
- output_size (Union[int, tuple]): The target output size. `ouput_size` can be a tuple :math:`(H, W)`,
94
+ output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`,
83
95
  or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
84
96
  If it is None, it means the output size is the same as the input size.
85
97
 
@@ -105,9 +117,12 @@ def adaptive_avg_pool2d(input, output_size):
105
117
  ValueError: If the dimension of `input` is less than or equal to the dimension of `output_size`.
106
118
 
107
119
  Supported Platforms:
108
- ``GPU``
120
+ ``Ascend`` ``GPU`` ``CPU``
109
121
 
110
122
  Examples:
123
+ >>> import mindspore
124
+ >>> import numpy as np
125
+ >>> from mindspore import Tensor, ops
111
126
  >>> # case 1: output_size=(None, 2)
112
127
  >>> input = Tensor(np.array([[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
113
128
  ... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
@@ -168,7 +183,7 @@ def adaptive_avg_pool3d(input, output_size):
168
183
 
169
184
  Args:
170
185
  input (Tensor): The input of adaptive_avg_pool3d, which is a 5D or 4D Tensor.
171
- output_size (Union[int, tuple]): The target output size. `ouput_size` can be a tuple :math:`(D, H, W)`,
186
+ output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(D, H, W)`,
172
187
  or an int D for :math:`(D, D, D)`. :math:`D`, :math:`H` and :math:`W` can be int or None
173
188
  which means the output size is the same as that of the input.
174
189
 
@@ -185,6 +200,9 @@ def adaptive_avg_pool3d(input, output_size):
185
200
  ``Ascend`` ``GPU`` ``CPU``
186
201
 
187
202
  Examples:
203
+ >>> import mindspore
204
+ >>> import numpy as np
205
+ >>> from mindspore import Tensor, ops
188
206
  >>> # case 1: output_size=(3, 3, 4)
189
207
  >>> output_size=(3, 3, 4)
190
208
  >>> input_val = np.random.randn(4, 3, 5, 6, 7)
@@ -215,7 +233,7 @@ def adaptive_avg_pool3d(input, output_size):
215
233
  def _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad):
216
234
  """Checks the type of avgpool1d input"""
217
235
  validator.check_value_type('kernel_size', kernel_size, [int], 'avg_pool1d')
218
- validator.check_value_type('stride', stride, [int], 'avg_pool1d')
236
+ validator.check_value_type('stride', stride, (int, tuple), 'avg_pool1d')
219
237
  validator.check_value_type('ceil_mode', ceil_mode, bool, 'avg_pool1d')
220
238
  validator.check_value_type('count_include_pad', count_include_pad, bool, 'avg_pool1d')
221
239
  validator.check_int(kernel_size, 1, validator.GE, "kernel_size", 'avg_pool1d')
@@ -245,15 +263,13 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
245
263
 
246
264
  Args:
247
265
  input_x (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})`.
248
- kernel_size (int): The size of kernel window used to take the average value. Default: 1.
249
- stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents the height and
250
- width of movement are both strides, or a tuple of two int numbers that represent height and width of
251
- movement respectively. Default: 1.
252
- padding (Union(int, tuple[int])): The pad value to be filled. If `padding` is an integer, the paddings of left
253
- and right are the same, equal to pad. If `padding` is a tuple of `2` integers, the padding of left and right
254
- equal to `padding[0]` and `padding[1]` correspondingly. Default: 0.
255
- ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: False.
256
- count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: True.
266
+ kernel_size (int): The size of kernel window used to take the average value. Default: ``1`` .
267
+ stride (Union(int, tuple[int])): The distance of kernel moving. `stride` can either be an int
268
+ number or a tuple of one int number. Default: ``1`` .
269
+ padding (Union(int, tuple[int])): The pad value to be filled. `padding` can either be an integer
270
+ or a tuple of one integer. Default: ``0`` .
271
+ ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
272
+ count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
257
273
 
258
274
  Returns:
259
275
  Tensor of shape :math:`(N, C_{out}, L_{out})`.
@@ -271,6 +287,9 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
271
287
  ``Ascend`` ``GPU`` ``CPU``
272
288
 
273
289
  Examples:
290
+ >>> import mindspore
291
+ >>> import numpy as np
292
+ >>> from mindspore import Tensor, ops
274
293
  >>> input_x = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
275
294
  >>> output = ops.avg_pool1d(input_x, kernel_size=6, stride=1)
276
295
  >>> print(output.shape)
@@ -280,20 +299,25 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
280
299
  raise TypeError("For avg_pool1d, the input input_x must be tensor")
281
300
 
282
301
  if len(input_x.shape) != 3:
283
- raise ValueError("For avg_pool1d, input must have 3 dim, but got {}.".format(len(input_x.shape)))
302
+ raise ValueError(f"For avg_pool1d, input must have 3 dim, but got {len(input_x.shape)}.")
284
303
 
285
304
  _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad)
286
305
  if isinstance(padding, int):
287
306
  check_non_negative_int(padding, 'padding', 'avg_pool1d')
288
307
  padding = (0, 0, 0, 0, padding, padding)
289
308
  elif isinstance(padding, tuple):
290
- if len(padding) != 2:
291
- raise ValueError("For avg_pool1d, padding should be int or tuple of length 2.")
309
+ if len(padding) != 1:
310
+ raise ValueError("For avg_pool1d, padding should be int or tuple of length 1.")
292
311
  for item in padding:
293
312
  check_non_negative_int(item, 'padding', 'avg_pool1d')
294
- padding = (0, 0, 0, 0, padding[0], padding[1])
313
+ padding = (0, 0, 0, 0, padding[0], padding[0])
295
314
  else:
296
- raise TypeError("For avg_pool1d, padding should be int or tuple of length 2.")
315
+ raise TypeError("For avg_pool1d, padding should be int or tuple of length 1.")
316
+
317
+ if isinstance(stride, tuple):
318
+ if len(stride) != 1:
319
+ raise ValueError("For avg_pool1d, stride should be int or tuple of length 1.")
320
+ stride = stride[0]
297
321
 
298
322
  expand_op = _get_cache_prim(P.ExpandDims)()
299
323
  squeeze_op = _get_cache_prim(P.Squeeze)((2, 3))
@@ -310,7 +334,7 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
310
334
  return input_x
311
335
 
312
336
 
313
- @constexpr
337
+ @_primexpr
314
338
  def _check_avgpool_2d_kernel_size(kernel_size):
315
339
  """check and calculate the avgpool2d kernel_size"""
316
340
  if isinstance(kernel_size, int):
@@ -327,7 +351,7 @@ def _check_avgpool_2d_kernel_size(kernel_size):
327
351
  return kernel_size
328
352
 
329
353
 
330
- @constexpr
354
+ @_primexpr
331
355
  def _check_avgpool_2d_stride(stride):
332
356
  """check and calculate the avgpool2d stride"""
333
357
  if isinstance(stride, int):
@@ -344,7 +368,7 @@ def _check_avgpool_2d_stride(stride):
344
368
  return stride
345
369
 
346
370
 
347
- @constexpr
371
+ @_primexpr
348
372
  def _check_avgpool_2d_padding(padding):
349
373
  """check and calculate the avgpool2d padding"""
350
374
  if isinstance(padding, int):
@@ -361,7 +385,7 @@ def _check_avgpool_2d_padding(padding):
361
385
  return padding
362
386
 
363
387
 
364
- @constexpr
388
+ @_primexpr
365
389
  def _check_avg_pool2d_type_and_value(ceil_mode, count_include_pad, divisor_override):
366
390
  """check the type of avgpool2d input"""
367
391
  validator.check_value_type('ceil_mode', ceil_mode, bool, 'avg_pool2d')
@@ -388,18 +412,18 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
388
412
  input_x (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
389
413
  kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value. It is an int number
390
414
  that represents height and width of the kernel, or a tuple of two int numbers that represent height and
391
- width respectively. Default: 1.
415
+ width respectively. Default: ``1`` .
392
416
  stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents the height and
393
417
  width of movement are both strides, or a tuple of two int numbers that represent height and width of
394
- movement respectively. Default: 1.
418
+ movement respectively. Default: ``1`` .
395
419
  padding (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `padding` is an integer, the
396
420
  paddings of top, bottom, left and right are the same, equal to pad. If `padding` is a tuple of `4` integers,
397
421
  the padding of top, bottom, left and right equal to `padding[0]`, `padding[1]`, `padding[2]` and
398
- `padding[3]` correspondingly. Default: 0.
399
- ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: False.
400
- count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: True.
422
+ `padding[3]` correspondingly. Default: ``0`` .
423
+ ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
424
+ count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
401
425
  divisor_override (int): If specified, it will be used as divisor in the averaging calculation, otherwise
402
- `kernel_size` will be used. Default: 0.
426
+ `kernel_size` will be used. Default: ``0``, which means not specified.
403
427
 
404
428
  Returns:
405
429
  Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
@@ -419,6 +443,9 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
419
443
  ``Ascend`` ``GPU`` ``CPU``
420
444
 
421
445
  Examples:
446
+ >>> import mindspore
447
+ >>> import numpy as np
448
+ >>> from mindspore import Tensor, ops
422
449
  >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape(1, 3, 3, 4), mindspore.float32)
423
450
  >>> output = ops.avg_pool2d(x, kernel_size=2, stride=1)
424
451
  >>> print(output)
@@ -433,7 +460,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
433
460
  raise TypeError("For avg_pool2d, the input input_x must be tensor")
434
461
 
435
462
  if len(input_x.shape) != 4:
436
- raise ValueError("For avg_pool2d, input must have 4 dim, but got {}.".format(len(input_x.shape)))
463
+ raise ValueError(f"For avg_pool2d, input must have 4 dim, but got {len(input_x.shape)}.")
437
464
 
438
465
  kernel_size = _check_avgpool_2d_kernel_size(kernel_size)
439
466
  stride = _check_avgpool_2d_stride(stride)
@@ -491,18 +518,20 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
491
518
  float32 data type.
492
519
  kernel_size (Union[int, tuple[int]], optional): The size of kernel used to take the average value, is an int
493
520
  number that represents depth, height and width are both `kernel_size`, or a tuple of three int numbers that
494
- represent depth, height and width respectively. Default: 1.
521
+ represent depth, height and width respectively. Default: ``1`` .
495
522
  stride (Union[int, tuple[int]], optional): The distance of kernel moving, an int number that represents the
496
523
  depth, height and width of movement are both stride, or a tuple of three int numbers that represent depth,
497
- height and width of movement respectively. Default: 1.
524
+ height and width of movement respectively. Default: ``1`` .
498
525
  padding (Union(int, tuple[int]), optional): The pad value to be filled. If `padding` is an integer, the addings
499
526
  of head, tail, top, bottom, left and right are the same, equal to pad. If `padding` is a tuple of six
500
527
  integers, the padding of head, tail, top, bottom, left and right equal to padding[0], padding[1],
501
- padding[2], padding[3], padding[4] and padding[5] correspondingly. Default: 0
502
- ceil_mode (bool, optional): If True, ceil instead of floor to compute the output shape. Default: False.
503
- count_include_pad (bool, optional): If True, averaging calculation will include the zero-padding. Default: True.
528
+ padding[2], padding[3], padding[4] and padding[5] correspondingly. Default: ``0`` .
529
+ ceil_mode (bool, optional): If ``True`` , ceil instead of floor to
530
+ compute the output shape. Default: ``False`` .
531
+ count_include_pad (bool, optional): If ``True`` , averaging calculation
532
+ will include the zero-padding. Default: ``True`` .
504
533
  divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
505
- otherwise `kernel_size` will be used. Default: 0.
534
+ otherwise `kernel_size` will be used. Default: ``0`` , which means not specified.
506
535
 
507
536
  Returns:
508
537
  Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the same data type with `input_x`.
@@ -522,6 +551,9 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
522
551
  ``Ascend`` ``GPU`` ``CPU``
523
552
 
524
553
  Examples:
554
+ >>> import mindspore
555
+ >>> import numpy as np
556
+ >>> from mindspore import Tensor, ops
525
557
  >>> input_x = Tensor(np.arange(1 * 2 * 2 * 2 * 3).reshape((1, 2, 2, 2, 3)), mindspore.float16)
526
558
  >>> output = ops.avg_pool3d(input_x, kernel_size=2, stride=1)
527
559
  >>> print(output)
@@ -532,7 +564,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
532
564
  raise TypeError("For avg_pool3d, the input input_x must be tensor")
533
565
 
534
566
  if len(input_x.shape) != 5:
535
- raise ValueError("For avg_pool3d, input must have 5 dim, but got {}.".format(len(input_x.shape)))
567
+ raise ValueError(f"For avg_pool3d, input must have 5 dim, but got {len(input_x.shape)}.")
536
568
 
537
569
  _check_avg_pool3d_padding(padding)
538
570
 
@@ -547,6 +579,12 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
547
579
 
548
580
 
549
581
  @constexpr
582
+ def is_ascend_backend():
583
+ """Check if the Ascend is used"""
584
+ return context.get_context('device_target') == 'Ascend'
585
+
586
+
587
+ @_primexpr
550
588
  def _check_adaptive_max_pool1d_output_size(output_size):
551
589
  """Check the output_size value in adaptive_max_pool1d op."""
552
590
  validator.check_int(output_size, 1, validator.GE, "output_size", 'adaptive_max_pool1d')
@@ -563,7 +601,8 @@ def adaptive_max_pool1d(input, output_size):
563
601
  shape :math:`(N, C, L_{out})`, where :math:`L_{out}` is defined by `output_size`.
564
602
 
565
603
  Note:
566
- :math:`L_{in}` must be divisible by `output_size`.
604
+ - :math:`L_{in}` must be divisible by `output_size`.
605
+ - Ascend platform only supports float16 type for input.
567
606
 
568
607
  Args:
569
608
  input (Tensor): Tensor of shape :math:`(N, C, L_{in})`, with float16 or float32 data type.
@@ -585,6 +624,9 @@ def adaptive_max_pool1d(input, output_size):
585
624
  ``Ascend`` ``GPU`` ``CPU``
586
625
 
587
626
  Examples:
627
+ >>> import mindspore
628
+ >>> import numpy as np
629
+ >>> from mindspore import Tensor, ops
588
630
  >>> input = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
589
631
  >>> output = ops.adaptive_max_pool1d(input, output_size=2)
590
632
  >>> print(output.shape)
@@ -599,16 +641,21 @@ def adaptive_max_pool1d(input, output_size):
599
641
  x_dtype = _get_cache_prim(P.DType)()(input)
600
642
 
601
643
  if len(x_in_shape) != 3:
602
- raise ValueError("For adaptive_max_pool1d input must have 3 dim, but got {}.".format(len(x_in_shape)))
644
+ raise ValueError(f"For adaptive_max_pool1d input must have 3 dim, but got {len(x_in_shape)}.")
603
645
  if x_in_shape[2] < output_size:
604
- raise ValueError("For adaptive_max_pool1d input's last dimension must be greater or equal to "
605
- "output size {}, but got {}.".format(output_size, x_in_shape[2]))
646
+ raise ValueError(f"For adaptive_max_pool1d input's last dimension must be greater or equal to "
647
+ f"output size {output_size}, but got {x_in_shape[2]}.")
606
648
  if x_in_shape[2] % output_size != 0:
607
- raise ValueError("For adaptive_max_pool1d input's last dimension must be divisible by "
608
- "output size {}, but got {}.".format(output_size, x_in_shape[2]))
609
- if x_dtype not in [mstype.float16, mstype.float32]:
610
- raise TypeError("For adaptive_max_pool1d, the input dtype must be float16 or float32, "
611
- "but got {}.".format(x_dtype))
649
+ raise ValueError(f"For adaptive_max_pool1d input's last dimension must be divisible by "
650
+ f"output size {output_size}, but got {x_in_shape[2]}.")
651
+ if is_ascend_backend():
652
+ if x_dtype not in [mstype.float16]:
653
+ raise TypeError(f"For adaptive_max_pool1d in Ascend platform, the input dtype must be float16, "
654
+ f"but got {x_dtype}.")
655
+ else:
656
+ if x_dtype not in [mstype.float16, mstype.float32]:
657
+ raise TypeError(f"For adaptive_max_pool1d, the input dtype must be float16 or float32, "
658
+ f"but got {x_dtype}.")
612
659
 
613
660
  expand_ = _get_cache_prim(P.ExpandDims)()
614
661
  squeeze_ = _get_cache_prim(P.Squeeze)(2)
@@ -616,11 +663,11 @@ def adaptive_max_pool1d(input, output_size):
616
663
  width = x_in_shape[2]
617
664
  stride = width // output_size
618
665
  kernel_size = width - (output_size - 1) * stride
619
-
620
- stride = (1, stride)
666
+ stride = (1, width // output_size)
621
667
  kernel_size = (1, kernel_size)
622
668
 
623
- max_pool_ = _get_cache_prim(P.MaxPool)(kernel_size=kernel_size, strides=stride)
669
+ max_pool_ = _get_cache_prim(NN_OPS.MaxPool)(kernel_size=kernel_size, strides=stride)
670
+
624
671
  input = expand_(input, 2)
625
672
  input = max_pool_(input)
626
673
  input = squeeze_(input)
@@ -659,7 +706,7 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
659
706
  Args:
660
707
  input (Tensor): A 3D or 4D tensor,
661
708
  with float16, float32 or float64 data type.
662
- output_size (Union[int, tuple]): The target output size. `ouput_size` can be a tuple :math:`(H, W)`,
709
+ output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`,
663
710
  or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
664
711
  If it is None, it means the output size is the same as the input size.
665
712
 
@@ -681,6 +728,9 @@ def adaptive_max_pool2d(input, output_size, return_indices=False):
681
728
  ``Ascend`` ``GPU`` ``CPU``
682
729
 
683
730
  Examples:
731
+ >>> import mindspore
732
+ >>> import numpy as np
733
+ >>> from mindspore import Tensor, ops
684
734
  >>> # case 1: output_size=(None, 2)
685
735
  >>> input = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
686
736
  ... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
@@ -747,6 +797,8 @@ def adaptive_max_pool3d(input, output_size, return_indices=False):
747
797
  ``GPU`` ``CPU``
748
798
 
749
799
  Examples:
800
+ >>> import numpy as np
801
+ >>> from mindspore import Tensor, ops
750
802
  >>> input = Tensor(np.arange(0,36).reshape((1, 3, 3, 4)).astype(np.float32))
751
803
  >>> output_size = (1, 1, 2)
752
804
  >>> output = ops.adaptive_max_pool3d(input, output_size, True)
@@ -795,9 +847,10 @@ def max_unpool1d(x, indices, kernel_size, stride=None, padding=0, output_size=No
795
847
  Data type must be in int32 or int64.
796
848
  kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value.
797
849
  stride (Union[int, tuple[int]]): The distance of kernel moving,
798
- If stride is 0, (0) or None, then stride equal to kernel_size. Default: None.
799
- padding (Union[int, tuple[int]]): The pad value to be filled. Default: 0.
800
- output_size (tuple[int], optional): The output shape. Default: None.
850
+ If stride is 0, (0) or ``None`` , then stride equal to kernel_size.
851
+ Default: ``None`` , which indicates the moving step is `kernel_size` .
852
+ padding (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` .
853
+ output_size (tuple[int], optional): The output shape. Default: ``None`` .
801
854
  If output_size == (), then the shape of output computed by `kernel_size`, `stride` and `padding`.
802
855
  If output_size != (), then output_size must be :math:`(N, C, H)` , :math:`(C, H)` or
803
856
  :math:`(H)` and output_size must belong to
@@ -821,6 +874,8 @@ def max_unpool1d(x, indices, kernel_size, stride=None, padding=0, output_size=No
821
874
  ``Ascend`` ``GPU`` ``CPU``
822
875
 
823
876
  Examples:
877
+ >>> import numpy as np
878
+ >>> from mindspore import Tensor, ops
824
879
  >>> x = Tensor(np.array([[2, 4, 6, 8]]).astype(np.float32))
825
880
  >>> indices = Tensor(np.array([[1, 3, 5, 7]]).astype(np.int64))
826
881
  >>> output = ops.max_unpool1d(x, indices, kernel_size =2, stride=2, padding=0)
@@ -912,11 +967,11 @@ def max_unpool2d(x, indices, kernel_size, stride=None, padding=0, output_size=No
912
967
  stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
913
968
  the height and width of movement are both stride, or a tuple of two int numbers that
914
969
  represent height and width of movement respectively.
915
- If stride is None, then stride equal to kernel_size. Default: None.
916
- padding (Union[int, tuple[int]]): The pad value to be filled. Default: 0. If `padding` is an integer,
970
+ Default: ``None`` , which indicates the moving step is `kernel_size` .
971
+ padding (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` . If `padding` is an integer,
917
972
  the paddings of height and width are the same, equal to padding. If `padding` is a tuple of two
918
973
  integers, the padding of height and width equal to padding[0] and padding[1] correspondingly.
919
- output_size (tuple[int], optional): The target output size. Default: None.
974
+ output_size (tuple[int], optional): The target output size. Default: ``None`` .
920
975
  If output_size == (), then the shape of output computed by `kernel_size`, `stride` and `padding`.
921
976
  If output_size != (), then output_size must be :math:`(N, C, H, W)` , :math:`(C, H, W)` or :math:`(H, W)`
922
977
  and output_size must belong to
@@ -942,6 +997,8 @@ def max_unpool2d(x, indices, kernel_size, stride=None, padding=0, output_size=No
942
997
  ``Ascend`` ``GPU`` ``CPU``
943
998
 
944
999
  Examples:
1000
+ >>> import numpy as np
1001
+ >>> from mindspore import Tensor, ops
945
1002
  >>> x = Tensor(np.array([[[[0, 1], [8, 9]]]]).astype(np.float32))
946
1003
  >>> indices = Tensor(np.array([[[[0, 1], [2, 3]]]]).astype(np.int64))
947
1004
  >>> output = ops.max_unpool2d(x, indices, kernel_size=1, stride=1, padding=0)
@@ -1015,12 +1072,12 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
1015
1072
  stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
1016
1073
  the depth, height and width of movement are both stride, or a tuple of three int numbers that
1017
1074
  represent depth, height and width of movement respectively.
1018
- If stride is None, then stride equal to kernel_size. Default: None.
1019
- padding (Union[int, tuple[int]]): The pad value to be filled. Default: 0. If `padding` is an integer,
1075
+ Default: ``None`` , which indicates the moving step is `kernel_size` .
1076
+ padding (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` . If `padding` is an integer,
1020
1077
  the paddings of depth, height and width are the same, equal to padding. If `padding` is a tuple of three
1021
1078
  integers, the padding of depth, height and width equal to padding[0], padding[1] and padding[2]
1022
1079
  correspondingly.
1023
- output_size (tuple[int], optional): The output size. Default: None. If output_size == (), then the shape of
1080
+ output_size (tuple[int], optional): The output size. Default: ``None`` . If output_size == (), then the shape of
1024
1081
  output computed by `kernel_size`, `stride` and `padding`. If output_size != (), then output_size must be
1025
1082
  :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` or :math:`(D, H, W)` and output_size must belong to
1026
1083
  :math:`[(N, C, D_{out} - stride[0], H_{out} - stride[1], W_{out} - stride[2]),
@@ -1045,6 +1102,8 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
1045
1102
  ``Ascend`` ``GPU`` ``CPU``
1046
1103
 
1047
1104
  Examples:
1105
+ >>> import numpy as np
1106
+ >>> from mindspore import Tensor, ops
1048
1107
  >>> x = Tensor(np.array([[[[[0, 1], [8, 9]]]]]).astype(np.float32))
1049
1108
  >>> indices= Tensor(np.array([[[[[0, 1], [2, 3]]]]]).astype(np.int64))
1050
1109
  >>> output = ops.max_unpool3d(x, indices, kernel_size=2, stride=1, padding=0)
@@ -1092,7 +1151,7 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
1092
1151
  return out
1093
1152
 
1094
1153
 
1095
- def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reduction='mean'):
1154
+ def binary_cross_entropy_with_logits(logits, label, weight=None, pos_weight=None, reduction='mean'):
1096
1155
  r"""
1097
1156
  Adds sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy
1098
1157
  between the logits and the label.
@@ -1122,7 +1181,7 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1122
1181
 
1123
1182
  This operator will multiply the output by the corresponding weight.
1124
1183
  The tensor :math:`weight` assigns different weights to each piece of data in the batch,
1125
- and the tensor :math:`pos_weight` adds corresponding weights to the positive examples of each category.
1184
+ and the tensor :math:`pos\_weight` adds corresponding weights to the positive examples of each category.
1126
1185
 
1127
1186
  In addition, it can trade off recall and precision by adding weights to positive examples.
1128
1187
  In the case of multi-label classification the loss can be described as:
@@ -1141,15 +1200,21 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1141
1200
  logits (Tensor): Input logits. Data type must be float16 or float32.
1142
1201
  label (Tensor): Ground truth label, has the same shape as `logits`.
1143
1202
  Data type must be float16 or float32.
1144
- weight (Tensor): A rescaling weight applied to the loss of each batch element. It can be
1203
+ weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. It can be
1145
1204
  broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
1146
- pos_weight (Tensor): A weight of positive examples. Must be a vector with length equal to the
1205
+ Default: ``None``, `weight` is a Tensor whose value is ``1``.
1206
+ pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
1147
1207
  number of classes. It can be broadcast to a tensor with shape of `logits`.
1148
- Data type must be float16 or float32.
1149
- reduction (str): Type of reduction to be applied to loss. The optional values are 'mean', 'sum', and 'none',
1150
- not case sensitive. If 'none', do not perform reduction. Default: 'mean'.
1208
+ Data type must be float16 or float32. Default: ``None``, `pos_weight` is a Tensor whose value is ``1``.
1209
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
1210
+ ``'sum'`` . Default: ``'mean'`` .
1211
+
1212
+ - ``'none'``: no reduction will be applied.
1213
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
1214
+ - ``'sum'``: the output elements will be summed.
1215
+
1151
1216
  Returns:
1152
- Tensor or Scalar, if `reduction` is 'none', it's a tensor with the same shape and type as input `logits`.
1217
+ Tensor or Scalar, if `reduction` is ``'none'``, it's a tensor with the same shape and type as input `logits`.
1153
1218
  Otherwise, the output is a scalar.
1154
1219
 
1155
1220
  Raises:
@@ -1157,12 +1222,15 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1157
1222
  TypeError: If data type of input `logits`, `label`, `weight`, `pos_weight` is neither float16 nor float32.
1158
1223
  TypeError: If data type of input `reduction` is not string.
1159
1224
  ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`.
1160
- ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
1225
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
1161
1226
 
1162
1227
  Supported Platforms:
1163
1228
  ``Ascend`` ``GPU`` ``CPU``
1164
1229
 
1165
1230
  Examples:
1231
+ >>> import mindspore
1232
+ >>> import numpy as np
1233
+ >>> from mindspore import Tensor, ops
1166
1234
  >>> logits = Tensor(np.array([[-0.8, 1.2, 0.7], [-0.1, -0.4, 0.7]]), mindspore.float32)
1167
1235
  >>> label = Tensor(np.array([[0.3, 0.8, 1.2], [-0.6, 0.1, 2.2]]), mindspore.float32)
1168
1236
  >>> weight = Tensor(np.array([1.0, 1.0, 1.0]), mindspore.float32)
@@ -1172,37 +1240,44 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
1172
1240
  0.3463612
1173
1241
  """
1174
1242
 
1243
+ if weight is None:
1244
+ weight = ops.ones_like(logits)
1245
+ if pos_weight is None:
1246
+ pos_weight = ops.ones_like(logits)
1175
1247
  bce_with_logits_loss_op = _get_cache_prim(NN_OPS.BCEWithLogitsLoss)(reduction)
1176
1248
  return bce_with_logits_loss_op(logits, label, weight, pos_weight)
1177
1249
 
1178
1250
 
1251
+ @_function_forbid_reuse
1179
1252
  def dropout(input, p=0.5, training=True, seed=None):
1180
- """
1253
+ r"""
1181
1254
  During training, randomly zeroes some of the elements of the input tensor
1182
- with probability `p` from a Bernoulli distribution. It plays the role of
1183
- reducing neuron correlation and avoid overfitting. The meaning of probability
1184
- here is opposite to that in `ops.Dropout` and `nn.Dropout`.
1255
+ with probability `p` from a Bernoulli distribution. It plays the role of reducing neuron correlation and
1256
+ avoid overfitting. And the return will be multiplied by :math:`\frac{1}{1-p}` during training.
1257
+ During the reasoning, this operation returns the same Tensor as the `x`.
1185
1258
 
1186
1259
  Args:
1187
- input (Tensor): The input of Dropout, a Tensor of any shape with data type of float16 or float32.
1260
+ input (Tensor): The input Tensor of shape :math:`(*, N)`, with data type of float16, float32 or float64.
1188
1261
  p (float, optional): The dropping rate, between 0 and 1, e.g. p = 0.1,
1189
- means dropping out 10% of input units. Default: 0.5.
1190
- training (bool): Apply dropout if is True. Default: True.
1262
+ means dropping out 10% of input units. Default: ``0.5`` .
1263
+ training (bool): Apply dropout if is True. Default: ``True``.
1191
1264
  seed (int, optional): Seed is used as entropy source for Random number engines generating pseudo-random numbers.
1192
- Default: None, which will be treated as 0.
1265
+ Default: ``None`` , which will be treated as ``0`` .
1193
1266
 
1194
1267
  Returns:
1195
1268
  - **output** (Tensor) - Zeroed tensor, with the same shape and data type as `input`.
1196
1269
 
1197
1270
  Raises:
1198
1271
  TypeError: If `p` is not a float.
1199
- TypeError: If dtype of `input` is neither float16 nor float32.
1272
+ TypeError: If dtype of `input` is not float16, float32 or float64.
1200
1273
  TypeError: If `input` is not a Tensor.
1201
1274
 
1202
1275
  Supported Platforms:
1203
1276
  ``Ascend`` ``GPU`` ``CPU``
1204
1277
 
1205
1278
  Examples:
1279
+ >>> import mindspore
1280
+ >>> from mindspore import Tensor, ops
1206
1281
  >>> input = Tensor(((20, 16), (50, 50)), mindspore.float32)
1207
1282
  >>> output = ops.dropout(input, p=0.5)
1208
1283
  >>> print(output.shape)
@@ -1213,7 +1288,9 @@ def dropout(input, p=0.5, training=True, seed=None):
1213
1288
  return input
1214
1289
  keep_prob = 1 - p
1215
1290
  seed0, seed1 = _get_seed(seed, "dropout")
1216
- out, _ = P.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)(input)
1291
+ dropout_op = P.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)
1292
+ dropout_op = _set_prim_op_user_data(dropout_op, "random_cache", False)
1293
+ out, _ = dropout_op(input)
1217
1294
  return out
1218
1295
 
1219
1296
 
@@ -1228,6 +1305,9 @@ def celu(x, alpha=1.0):
1228
1305
 
1229
1306
  For more details, please refer to `celu <https://arxiv.org/abs/1704.07483>`_.
1230
1307
 
1308
+ .. warning::
1309
+ This is an experimental API that is subject to change or deletion.
1310
+
1231
1311
  Args:
1232
1312
  x (Tensor): The input of celu with data type of float16 or float32.
1233
1313
  alpha (float, optional): The :math:`\alpha` value for the Celu formulation. Default: 1.0
@@ -1245,6 +1325,9 @@ def celu(x, alpha=1.0):
1245
1325
  ``Ascend`` ``GPU`` ``CPU``
1246
1326
 
1247
1327
  Examples:
1328
+ >>> import mindspore
1329
+ >>> import numpy as np
1330
+ >>> from mindspore import Tensor, ops
1248
1331
  >>> x = Tensor(np.array([-2.0, -1.0, 1.0, 2.0]), mindspore.float32)
1249
1332
  >>> output = ops.celu(x, alpha=1.0)
1250
1333
  >>> print(output)
@@ -1278,8 +1361,8 @@ def dropout1d(input, p=0.5, training=True):
1278
1361
  number of channels, `L` is the feature length. The data type must be int8, int16, int32, int64, float16,
1279
1362
  float32 or float64.
1280
1363
  p (float, optional): The dropping probability of a channel, between 0 and 1, e.g. `p` = 0.8,
1281
- which means an 80% chance of clearing. Default: 0.5.
1282
- training (bool, optional): Apply dropout if is True. Default: True.
1364
+ which means an 80% chance of clearing. Default: ``0.5`` .
1365
+ training (bool, optional): Apply dropout if is True. Default: ``True`` .
1283
1366
 
1284
1367
  Returns:
1285
1368
  Tensor, output, with the same shape and data type as `input`.
@@ -1294,6 +1377,9 @@ def dropout1d(input, p=0.5, training=True):
1294
1377
  ``Ascend`` ``GPU`` ``CPU``
1295
1378
 
1296
1379
  Examples:
1380
+ >>> import mindspore
1381
+ >>> import numpy as np
1382
+ >>> from mindspore import Tensor, ops
1297
1383
  >>> input_x = Tensor(np.random.randn(4, 3), mindspore.float32)
1298
1384
  >>> output = ops.dropout1d(input_x, 0.5)
1299
1385
  >>> print(output.shape)
@@ -1349,8 +1435,8 @@ def dropout2d(input, p=0.5, training=True):
1349
1435
  of channels, `H` is the feature height, and `W` is the feature width. The data type must be int8,
1350
1436
  int16, int32, int64, float16, float32 or float64.
1351
1437
  p (float): The dropping probability of a channel, between 0 and 1, e.g. `p` = 0.8,
1352
- which means dropping out 80% of channels. Default: 0.5.
1353
- training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: True.
1438
+ which means dropping out 80% of channels. Default: ``0.5`` .
1439
+ training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: ``True`` .
1354
1440
 
1355
1441
  Returns:
1356
1442
  Tensor, output, with the same shape and data type as `input`.
@@ -1366,6 +1452,9 @@ def dropout2d(input, p=0.5, training=True):
1366
1452
  ``Ascend`` ``GPU`` ``CPU``
1367
1453
 
1368
1454
  Examples:
1455
+ >>> import mindspore
1456
+ >>> import numpy as np
1457
+ >>> from mindspore import Tensor, ops
1369
1458
  >>> input = Tensor(np.ones([2, 1, 2, 3]), mindspore.float32)
1370
1459
  >>> output = ops.dropout2d(input, 0.5)
1371
1460
  >>> print(output.shape)
@@ -1398,8 +1487,8 @@ def dropout3d(input, p=0.5, training=True):
1398
1487
  of channels, `D` is the feature depth, `H` is the feature height, and `W` is the feature width.
1399
1488
  The data type must be int8, int16, int32, int64, float16, float32 or float64.
1400
1489
  p (float): The dropping probability of a channel, between 0 and 1, e.g. `p` = 0.8,
1401
- which means dropping out 80% of channels. Default: 0.5.
1402
- training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: True.
1490
+ which means dropping out 80% of channels. Default: ``0.5`` .
1491
+ training(bool): If `training` is True, applying dropout, otherwise, not applying. Default: ``True`` .
1403
1492
 
1404
1493
  Returns:
1405
1494
  Tensor, output, with the same shape and data type as `input`.
@@ -1415,6 +1504,9 @@ def dropout3d(input, p=0.5, training=True):
1415
1504
  ``Ascend`` ``GPU`` ``CPU``
1416
1505
 
1417
1506
  Examples:
1507
+ >>> import mindspore
1508
+ >>> import numpy as np
1509
+ >>> from mindspore import Tensor, ops
1418
1510
  >>> input = Tensor(np.ones([2, 1, 2, 1, 2]), mindspore.float32)
1419
1511
  >>> output = ops.dropout3d(input, 0.5)
1420
1512
  >>> print(output.shape)
@@ -1452,6 +1544,9 @@ def fast_gelu(x):
1452
1544
  ``Ascend`` ``GPU`` ``CPU``
1453
1545
 
1454
1546
  Examples:
1547
+ >>> import mindspore
1548
+ >>> import numpy as np
1549
+ >>> from mindspore import Tensor, ops
1455
1550
  >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
1456
1551
  >>> output = ops.fast_gelu(x)
1457
1552
  >>> print(output)
@@ -1461,20 +1556,28 @@ def fast_gelu(x):
1461
1556
  return fast_gelu_(x)
1462
1557
 
1463
1558
 
1464
- @constexpr
1465
- def _check_float_range_inc_right(arg_value, lower_limit, upper_limit, arg_name=None, prim_name=None):
1559
+ @_primexpr
1560
+ def _check_float_range_inc_neither(arg_value, lower_limit, upper_limit, arg_name=None, prim_name=None):
1466
1561
  """
1467
- Method for checking whether input value is in float range inc right.
1562
+ Method for checking whether input value is in float range inc neither.
1468
1563
  """
1469
- return validator.check_float_range(arg_value, lower_limit, upper_limit, validator.INC_RIGHT, arg_name, prim_name)
1564
+ return validator.check_float_range(arg_value, lower_limit, upper_limit, validator.INC_NEITHER, arg_name, prim_name)
1565
+
1566
+
1567
+ def _check_fractional_output_size_ratio(output_size, output_ratio, cls_name):
1568
+ """Internal function, used to check whether fractional_max_pool can specify the output shape."""
1569
+ if output_ratio is None and output_size is None:
1570
+ raise ValueError(f"For {cls_name}, 'output_size' and 'output_ratio' can not be None"
1571
+ f"at the same time, but got {output_ratio} and {output_size} .")
1470
1572
 
1471
1573
 
1472
1574
  def fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=None, return_indices=False,
1473
1575
  _random_samples=None):
1474
1576
  r"""
1475
1577
  Applies the 2D FractionalMaxPool operatin over `input`. The output Tensor shape can be determined by either
1476
- `output_size` or `output_ratio`, and the step size is determined by `_random_samples`.
1477
- `output_size` or `output_ratio` cannot be used at the same time.
1578
+ `output_size` or `output_ratio`, and the step size is determined by `_random_samples`. `output_size` will take
1579
+ effect when `output_size` and `output_ratio` are set at the same time.
1580
+ And `output_size` and `output_ratio` can not be ``None`` at the same time.
1478
1581
 
1479
1582
  Refer to the paper `Fractional MaxPooling by Ben Graham <https://arxiv.org/abs/1412.6071>`_ for more details.
1480
1583
 
@@ -1489,16 +1592,17 @@ def fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=Non
1489
1592
  is an int number that represents height and width, or a tuple
1490
1593
  of two int numbers that represent height and width respectively.
1491
1594
  The value must be a positive integer.
1492
- Default: None.
1595
+ Default: ``None``.
1493
1596
  output_ratio (Union[float, tuple[float]], optional): The ratio of target output shape to input shape.
1494
1597
  Specifying the size of the output tensor by using a ratio of the input size.
1495
1598
  Data type: float16, float32, double, and value is between (0, 1).
1496
- Default: None.
1497
- return_indices (bool, optional): Whether to return the indices of max value. Default: False.
1498
- _random_samples (Tensor, optional): The random step of FractionalMaxPool2d, which is a 3D tensor.
1499
- Tensor of data type: float16, float32, double, and value is between (0, 1).
1599
+ Default: ``None``.
1600
+ return_indices (bool, optional): Whether to return the indices of max value. Default: ``False``.
1601
+ _random_samples (Tensor, optional): The random step of fractional_max_pool2d, which is a 3D tensor.
1602
+ Tensor of data type: float16, float32, double, and value is between [0, 1).
1500
1603
  Supported shape :math:`(N, C, 2)` or :math:`(1, C, 2)`.
1501
- Default: None.
1604
+ Default: ``None``, the values of `_random_samples`
1605
+ will be randomly distributed using uniform distribution over an interval [0,1).
1502
1606
 
1503
1607
  Returns:
1504
1608
  - **y** (Tensor) - Has the same type as the `input`.
@@ -1526,6 +1630,9 @@ def fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=Non
1526
1630
  ``CPU``
1527
1631
 
1528
1632
  Examples:
1633
+ >>> import numpy as np
1634
+ >>> from mindspore import Tensor, ops
1635
+ >>> from mindspore import dtype as mstype
1529
1636
  >>> input = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698,
1530
1637
  ... 0.5135, 0.5740, 0.3435, 0.1895, 0.8764,
1531
1638
  ... 0.9581, 0.4760, 0.9014, 0.8522, 0.3664,
@@ -1549,21 +1656,28 @@ def fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=Non
1549
1656
  [[[[ 1 9]
1550
1657
  [16 24]]]]
1551
1658
  """
1552
- if output_ratio is not None and output_size is not None or output_ratio is None and output_size is None:
1553
- raise ValueError(f"For fractional_max_pool2d, 'output_size' and 'output_ratio' can not be specified or None"
1554
- f"at the same time, but got {output_ratio} and {output_size} .")
1555
- if len(input.shape) == 3:
1659
+ _check_fractional_output_size_ratio(output_size, output_ratio, "fractional_max_pool2d")
1660
+ _check_value_type("return_indices", return_indices, [bool], "fractional_max_pool2d")
1661
+ dim_flag = False
1662
+ if input.ndim == 3:
1556
1663
  input = input.expand_dims(axis=0)
1664
+ dim_flag = True
1557
1665
  if _random_samples is None:
1558
- _random_samples = Tensor([[[0, 0]]], mstype.float32)
1559
- if output_ratio is not None:
1560
- if isinstance(output_ratio, float):
1666
+ if input.dtype in mstype.float_type:
1667
+ _random_samples = ops.rand(input.shape[0], input.shape[1], 2, dtype=input.dtype)
1668
+ else:
1669
+ _random_samples = ops.rand(input.shape[0], input.shape[1], 2)
1670
+ if output_size is None:
1671
+ if isinstance(output_ratio, (float, int)):
1672
+ _check_value_type("output_ratio", output_ratio, [float], "fractional_max_pool2d")
1561
1673
  output_ratio = (output_ratio, output_ratio)
1562
- _check_float_range_inc_right(output_ratio[0], 0.0, 1.0)
1563
- _check_float_range_inc_right(output_ratio[1], 0.0, 1.0)
1674
+ _check_float_range_inc_neither(output_ratio[0], 0.0, 1.0, "output_ratio[0]", "fractional_max_pool2d")
1675
+ _check_float_range_inc_neither(output_ratio[1], 0.0, 1.0, "output_ratio[1]", "fractional_max_pool2d")
1564
1676
  output_size = (int(input.shape[-2] * output_ratio[0]), int(input.shape[-1] * output_ratio[1]))
1565
1677
  fractional_max_pool = FractionalMaxPoolWithFixedKsize(kernel_size, output_size)
1566
1678
  output = fractional_max_pool(input, _random_samples)
1679
+ if dim_flag:
1680
+ output = output[0].squeeze(axis=0), output[1].squeeze(axis=0)
1567
1681
  if return_indices:
1568
1682
  return output
1569
1683
  return output[0]
@@ -1573,17 +1687,21 @@ def fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=Non
1573
1687
  _random_samples=None):
1574
1688
  r"""
1575
1689
  Applies the 3D FractionalMaxPool operatin over `input`. The output Tensor shape can be determined by either
1576
- `output_size` or `output_ratio`, and the step size is determined by `_random_samples`.
1577
- `output_size` or `output_ratio` cannot be used at the same time.
1690
+ `output_size` or `output_ratio`, and the step size is determined by `_random_samples`. `output_size` will take
1691
+ effect when `output_size` and `output_ratio` are set at the same time.
1692
+ And `output_size` and `output_ratio` can not be ``None`` at the same time.
1578
1693
 
1579
1694
  Refer to the paper `Fractional MaxPooling by Ben Graham <https://arxiv.org/abs/1412.6071>`_ for more details.
1580
1695
 
1581
1696
  The input and output data format can be "NCDHW". N is the batch size, C is the number of channels,
1582
1697
  D the feature depth, H is the feature height, and W is the feature width.
1583
1698
 
1699
+ .. warning::
1700
+ This is an experimental API that is subject to change or deletion.
1701
+
1584
1702
  Args:
1585
1703
  input (Tensor): The input of FractionalMaxPool3d, which is a 4D or 5D tensor.
1586
- Tensor of data type: float16, float32, double, int32, int64.
1704
+ Tensor of data type: float16, float32, double.
1587
1705
  Supported shape :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
1588
1706
  kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
1589
1707
  is an int number that represents depth, height and width of the kernel, or a tuple
@@ -1593,15 +1711,16 @@ def fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=Non
1593
1711
  is an int number that represents depth, height and width, or a tuple
1594
1712
  of three int numbers that represent depth, height and width respectively.
1595
1713
  The value must be a positive integer.
1596
- Default: None.
1714
+ Default: ``None`` .
1597
1715
  output_ratio (Union[float, tuple[float]], optional): The ratio of target output shape to input shape.
1598
1716
  Specifying the size of the output tensor by using a ratio of the input size.
1599
1717
  Data type: float16, float32, double, and value is between (0, 1).
1600
- Default: None.
1601
- return_indices (bool, optional): Whether to return the indices of max value. Default: False.
1602
- _random_samples (Tensor, optional): The random step of FractionalMaxPool3d, which is a 3D tensor.
1603
- Tensor of data type: float16, float32, double, and value is between (0, 1).
1604
- Supported shape :math:`(N, C, 3)` or :math:`(1, C, 3)` .
1718
+ Default: ``None`` .
1719
+ return_indices (bool, optional): Whether to return the indices of max value. Default: ``False`` .
1720
+ _random_samples (Tensor, optional): The random step of fractional_max_pool3d, which is a 3D tensor.
1721
+ Tensor of data type: float16, float32, double, and value is between [0, 1).
1722
+ Supported shape :math:`(N, C, 3)` or :math:`(1, C, 3)` . Default: ``None``, the values of `_random_samples`
1723
+ will be randomly distributed using uniform distribution over an interval [0,1).
1605
1724
 
1606
1725
  Returns:
1607
1726
  - **y** (Tensor) - A tensor, the output of FractionalMaxPool3d.
@@ -1619,6 +1738,7 @@ def fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=Non
1619
1738
  TypeError: If data type of `input` is not float16, float32, double, int32, int64.
1620
1739
  TypeError: If dtype of `_random_samples` is not float16, float32, double.
1621
1740
  TypeError: If dtype of `argmax` is not int32, int64.
1741
+ TypeError: if _random_samples to have the different dtypes as input.
1622
1742
  ValueError: If `output_size` is a tuple and if `output_size` length is not 3.
1623
1743
  ValueError: If `kernel_size` is a tuple and if `kernel_size` length is not 3.
1624
1744
  ValueError: If numbers in `output_size` or `kernel_size` is not positive.
@@ -1631,35 +1751,47 @@ def fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=Non
1631
1751
  ``GPU`` ``CPU``
1632
1752
 
1633
1753
  Examples:
1754
+ >>> import numpy as np
1755
+ >>> from mindspore import Tensor, ops
1756
+ >>> from mindspore import dtype as mstype
1634
1757
  >>> x = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
1635
1758
  ... .reshape([1, 1, 2, 2, 4]), mstype.float32)
1636
1759
  >>> _random_samples = Tensor(np.array([0.7, 0.7, 0.7]).reshape([1, 1, 3]), mstype.float32)
1637
- >>> output, argmax = ops.fractional_max_pool3d(x, kernel_size=(1.0, 1.0, 1.0), output_size=(1, 1, 3),
1760
+ >>> output, argmax = ops.fractional_max_pool3d(x, kernel_size=(1, 1, 1), output_size=(1, 1, 3),
1638
1761
  ... _random_samples=_random_samples, return_indices=True)
1639
1762
  >>> print(output)
1640
1763
  [[[[[13. 14. 16.]]]]]
1641
1764
  >>> print(argmax)
1642
1765
  [[[[[12 13 15]]]]]
1643
- >>> output, argmax = ops.fractional_max_pool3d(x, kernel_size=(1.0, 1.0, 1.0), output_ratio=(0.5, 0.5, 0.5),
1766
+ >>> output, argmax = ops.fractional_max_pool3d(x, kernel_size=(1, 1, 1), output_ratio=(0.5, 0.5, 0.5),
1644
1767
  ... _random_samples=_random_samples, return_indices=True)
1645
1768
  >>> print(output)
1646
1769
  [[[[[13. 16.]]]]]
1647
1770
  >>> print(argmax)
1648
1771
  [[[[[12 15]]]]]
1649
1772
  """
1650
- if output_ratio is not None and output_size is not None or output_ratio is None and output_size is None:
1651
- raise ValueError(f"For fractional_max_pool2d, 'output_size' and 'output_ratio' can not be specified or None"
1652
- f"at the same time, but got {output_ratio} and {output_size} .")
1773
+ _check_fractional_output_size_ratio(output_size, output_ratio, "fractional_max_pool3d")
1774
+ _check_value_type("return_indices", return_indices, [bool], "fractional_max_pool3d")
1653
1775
  if _random_samples is None:
1654
- _random_samples = Tensor([[[0, 0, 0]]], mstype.float32)
1655
- if output_ratio is not None:
1656
- if isinstance(output_ratio, float):
1776
+ n = 1 if input.ndim == 4 else input.shape[0]
1777
+ if input.dtype in mstype.float_type:
1778
+ _random_samples = ops.rand(n, input.shape[-4], 3, dtype=input.dtype)
1779
+ else:
1780
+ _random_samples = ops.rand(n, input.shape[-4], 3)
1781
+ if input.ndim == 4:
1782
+ _random_samples = _random_samples.transpose(1, 0, 2)
1783
+ if output_size is None:
1784
+ if isinstance(output_ratio, (float, int)):
1785
+ _check_value_type("output_ratio", output_ratio, [float], "fractional_max_pool3d")
1657
1786
  output_ratio = (output_ratio, output_ratio, output_ratio)
1658
- _check_float_range_inc_right(output_ratio[0], 0.0, 1.0)
1659
- _check_float_range_inc_right(output_ratio[1], 0.0, 1.0)
1660
- _check_float_range_inc_right(output_ratio[2], 0.0, 1.0)
1787
+ _check_float_range_inc_neither(output_ratio[0], 0.0, 1.0, "output_ratio[0]", "fractional_max_pool3d")
1788
+ _check_float_range_inc_neither(output_ratio[1], 0.0, 1.0, "output_ratio[1]", "fractional_max_pool3d")
1789
+ _check_float_range_inc_neither(output_ratio[2], 0.0, 1.0, "output_ratio[2]", "fractional_max_pool3d")
1661
1790
  output_size = (int(input.shape[-3] * output_ratio[0]), int(input.shape[-2] * output_ratio[1]),
1662
1791
  int(input.shape[-1] * output_ratio[2]))
1792
+ if input.dtype != _random_samples.dtype:
1793
+ raise TypeError(f"For 'fractional_max_pool3d', 'input' and '_random_samples' must be same dtype, "
1794
+ f"but got Tensor[{input.dtype}] and Tensor[{_random_samples.dtype}].")
1663
1795
  fractional_max_pool = FractionalMaxPool3DWithFixedKsize(kernel_size, output_size)
1664
1796
  output = fractional_max_pool(input, _random_samples)
1665
1797
  if return_indices:
@@ -1700,10 +1832,10 @@ def kl_div(logits, labels, reduction='mean'):
1700
1832
  logits (Tensor): The input Tensor. The data type must be float16, float32 or float64.
1701
1833
  labels (Tensor): The label Tensor which has the same shape and data type as `logits`.
1702
1834
  reduction (str): Specifies the reduction to be applied to the output.
1703
- Its value must be one of 'none', 'mean', 'batchmean' or 'sum'. Default: 'mean'.
1835
+ Its value must be one of ``'none'`` , ``'mean'`` , ``'batchmean'`` or ``'sum'`` . Default: ``'mean'`` .
1704
1836
 
1705
1837
  Returns:
1706
- Tensor or Scalar, if `reduction` is 'none', then output is a tensor and has the same shape as `logits`.
1838
+ Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`.
1707
1839
  Otherwise, it is a scalar.
1708
1840
 
1709
1841
  Raises:
@@ -1715,6 +1847,9 @@ def kl_div(logits, labels, reduction='mean'):
1715
1847
  ``Ascend`` ``GPU`` ``CPU``
1716
1848
 
1717
1849
  Examples:
1850
+ >>> import mindspore
1851
+ >>> import numpy as np
1852
+ >>> from mindspore import Tensor, ops
1718
1853
  >>> logits = Tensor(np.array([0.2, 0.7, 0.1]), mindspore.float32)
1719
1854
  >>> labels = Tensor(np.array([0., 1., 0.]), mindspore.float32)
1720
1855
  >>> output = mindspore.ops.kl_div(logits, labels, 'mean')
@@ -1727,13 +1862,13 @@ def kl_div(logits, labels, reduction='mean'):
1727
1862
 
1728
1863
  if reduction == 'batchmean':
1729
1864
  kl_div_sum = P.KLDivLoss(reduction='sum')(logits, labels)
1730
- shape = P.TensorShape()(logits)
1865
+ shape = P.Shape()(logits)
1731
1866
  batch_size = shape[0]
1732
1867
  return kl_div_sum / batch_size
1733
1868
 
1734
1869
  if reduction == 'mean':
1735
1870
  kl_div_sum = P.KLDivLoss(reduction='sum')(logits, labels)
1736
- shape = P.TensorShape()(logits)
1871
+ shape = P.Shape()(logits)
1737
1872
  total_size = 1
1738
1873
  for dim in shape:
1739
1874
  total_size = total_size * dim
@@ -1758,7 +1893,7 @@ def hardshrink(x, lambd=0.5):
1758
1893
 
1759
1894
  Args:
1760
1895
  x (Tensor): The input of Hard Shrink with data type of float16 or float32.
1761
- lambd (float): The threshold :math:`\lambda` defined by the Hard Shrink formula. Default: 0.5.
1896
+ lambd (float): The threshold :math:`\lambda` defined by the Hard Shrink formula. Default: ``0.5`` .
1762
1897
 
1763
1898
  Returns:
1764
1899
  Tensor, has the same data type and shape as the input `x`.
@@ -1772,6 +1907,9 @@ def hardshrink(x, lambd=0.5):
1772
1907
  ``Ascend`` ``GPU`` ``CPU``
1773
1908
 
1774
1909
  Examples:
1910
+ >>> import mindspore
1911
+ >>> import numpy as np
1912
+ >>> from mindspore import Tensor, ops
1775
1913
  >>> x = Tensor(np.array([[ 0.5, 1, 2.0], [0.0533,0.0776,-2.1233]]), mindspore.float32)
1776
1914
  >>> output = ops.hardshrink(x)
1777
1915
  >>> print(output)
@@ -1949,7 +2087,7 @@ def is_floating_point(input):
1949
2087
  input (Tensor): The input Tensor.
1950
2088
 
1951
2089
  Returns:
1952
- Bool. If the dtype of `input` is a floating point data type, return True. Otherwise, return False.
2090
+ Bool. If the dtype of `input` is a floating point data type, return ``True`` . Otherwise, return ``False`` .
1953
2091
 
1954
2092
  Supported Platforms:
1955
2093
  ``Ascend`` ``GPU`` ``CPU``
@@ -1990,12 +2128,15 @@ def hardswish(x):
1990
2128
 
1991
2129
  Raises:
1992
2130
  TypeError: If `x` is not a Tensor.
1993
- TypeError: If dtype of `x` is not supported.
2131
+ TypeError: If dtype of `x` is not int or float.
1994
2132
 
1995
2133
  Supported Platforms:
1996
2134
  ``Ascend`` ``GPU`` ``CPU``
1997
2135
 
1998
2136
  Examples:
2137
+ >>> import mindspore
2138
+ >>> import numpy as np
2139
+ >>> from mindspore import Tensor, ops
1999
2140
  >>> x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16)
2000
2141
  >>> output = ops.hardswish(x)
2001
2142
  >>> print(output)
@@ -2004,12 +2145,79 @@ def hardswish(x):
2004
2145
  return hardswish_(x)
2005
2146
 
2006
2147
 
2148
+ def _is_dim_unknown(shape):
2149
+ return isinstance(shape, tuple) and -2 in shape
2150
+
2151
+
2152
+ @_primexpr
2153
+ def _interploate_make_tuple(rank, value):
2154
+ s = tuple_to_tensor_((rank,), mstype.int32)
2155
+ v = Tensor(value)
2156
+ t = _get_cache_prim(P.FillV2)()(s, v)
2157
+ out = tensor_to_tuple_(t)
2158
+ return out
2159
+
2160
+
2007
2161
  @_primexpr
2008
- def _scale_factor_convert_size(shape, scale_factor, dim):
2009
- return [int(float(shape[i + 2]) * scale_factor[i] // 1) for i in range(dim)]
2162
+ def _interpolate_scale_factor_convert_size(shape, scale_factor):
2163
+ x = tuple_to_tensor_(shape[2:], mstype.int64)
2164
+ y = tuple_to_tensor_(scale_factor, mstype.float32)
2165
+ t = x * y
2166
+ t = ops.TruncateDiv()(t, Tensor(1))
2167
+ t = ops.cast(t, mstype.int64)
2168
+ return tensor_to_tuple_(t)
2169
+
2170
+
2171
+ def _interpolate_size_check_with_rank(size, input_rank):
2172
+ if len(size) != input_rank - 2:
2173
+ raise ValueError(
2174
+ f"For 'interpolate', 'input' and 'size' must have the same spatial dimensions, "
2175
+ f"but got 'input' is {input_rank - 2}D, 'size' is {len(size)}D")
2176
+
2177
+
2178
+ def _interpolate_scale_factor_check_with_rank(scale_factor, input_rank):
2179
+ if len(scale_factor) != input_rank - 2:
2180
+ raise ValueError(
2181
+ f"For 'interpolate', 'input' and 'scale_factor' must have the same spatial dimensions, "
2182
+ f"but got 'input' is {input_rank - 2}D, 'scale_factor' is {len(scale_factor)}D"
2183
+ )
2010
2184
 
2011
2185
 
2012
- def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None, recompute_scale_factor=None):
2186
+ def _interpolate_mode_check(mode, supported_dict):
2187
+ if isinstance(mode, list) or mode not in supported_dict:
2188
+ raise ValueError(
2189
+ f"For 'interpolate', 'mode' must be in '{list(supported_dict)}', but got {mode}"
2190
+ )
2191
+
2192
+
2193
+ def _interpolate_rank_check(input_rank, mode, supported_dict):
2194
+ if input_rank not in supported_dict.get(mode):
2195
+ raise ValueError(
2196
+ f"For 'interpolate', {mode} only support '{list(supported_dict.get(mode, {}))}'D, but got {input_rank}D"
2197
+ )
2198
+
2199
+
2200
+ def _interpolate_scale_factor_check(scale_factor, mode, rank, supported_dict):
2201
+ if scale_factor is not None and "scale_factor" not in supported_dict.get(
2202
+ mode, {}).get(rank):
2203
+ raise ValueError(
2204
+ f"For 'interpolate', 'scale_factor' option cannot currently be set with the "
2205
+ f"mode = {mode} and dim = {rank}D.")
2206
+
2207
+
2208
+ def _interpolate_align_corners_mode_check(rank, mode, supported_dict):
2209
+ if "align_corners" not in supported_dict.get(mode, {}).get(rank):
2210
+ raise ValueError(
2211
+ f"For 'interpolate', 'align_corners' option cannot currently be set with the "
2212
+ f"mode = {mode}, and dim = {rank}D")
2213
+
2214
+
2215
+ def interpolate(input,
2216
+ size=None,
2217
+ scale_factor=None,
2218
+ mode="nearest",
2219
+ align_corners=None,
2220
+ recompute_scale_factor=None):
2013
2221
  r"""
2014
2222
  Samples the input Tensor to the given size or scale_factor by using one of the interpolate algorithms.
2015
2223
 
@@ -2018,16 +2226,21 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
2018
2226
  Input tensor must be a 3-D, 4-D, or 5-D tensor with shape
2019
2227
  :math:`(N, C, [optional D], [optional H], W)` , with data type of float.
2020
2228
  size (Union[int, tuple[int], list[int]], optional): The target size.
2021
- If size is a tuple or list, size must have the same dimensions as input.
2022
- One and only one of size and scale_factor can be set to None. Default: None.
2229
+ If size is a tuple or list, its length should be the same as the number of dimensions in input
2230
+ after removing the first two dimensions N, C.
2231
+ One and only one of size and scale_factor can be set to None. Default: ``None`` .
2023
2232
  scale_factor (Union[float, tuple[float], list[float]], optional): The scale factor of new size of the tensor.
2024
- If size is a tuple or list, size must have the same dimensions as input.
2025
- One and only one of size and scale_factor can be set to None. Default: None.
2233
+ If scale_factor is a tuple or list, its length should be the same as the number of dimensions in input
2234
+ after removing the first two dimensions N, C.
2235
+ One and only one of size and scale_factor can be set to None. Default: ``None`` .
2026
2236
  mode (str): The sampling algorithm.
2027
- One of 'nearest'(3D and 4D), 'linear' (3D only), 'bilinear' (4D only), 'bicubic' (4D only),
2028
- 'area', 'nearest-exact'(3D and 4D). Default: 'nearest'.
2237
+ One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
2238
+ 'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
2239
+ knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
2240
+
2029
2241
  align_corners (bool): If True, rescale input by :math:`(new\_height - 1) / (height - 1)`, which exactly
2030
2242
  aligns the corners of data and resized data. If False, rescale by :math:`new\_height / height`.
2243
+ Default: ``None`` .
2031
2244
 
2032
2245
  .. code-block::
2033
2246
 
@@ -2035,11 +2248,11 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
2035
2248
 
2036
2249
  old_i = new_length > 1 ? (new_x + 0.5) * old_length / new_length - 0.5 : 0 # 'align_corners' = False
2037
2250
 
2038
- This is only valid for 'linear', 'bilinear', or 'bicubic' modes. Default: False.
2251
+ This is only valid for 'linear', 'bilinear', or 'bicubic' modes. Default: ``False`` .
2039
2252
  recompute_scale_factor (bool, optional): Recalculate `scale_factor`.
2040
2253
  If True, the parameter `size` will be calculated using the value of the `scale_factor`,
2041
2254
  and finally scaled using the value of `size`.
2042
- If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: None.
2255
+ If False, the value of `size` or `scale_factor` will be used for direct interpolation. Default: ``None`` .
2043
2256
 
2044
2257
  .. note::
2045
2258
  The 'nearest-exact' mode is the same as the nearest-neighbor interpolation algorithm used in
@@ -2055,22 +2268,26 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
2055
2268
  +---------------+-----------+---------------+--------------+----------------+
2056
2269
  | | 4 | \- | × | Ascend,GPU,CPU |
2057
2270
  +---------------+-----------+---------------+--------------+----------------+
2058
- | linear | 3 | | × | GPU,CPU |
2271
+ | | 5 | \- | | Ascend,GPU,CPU |
2272
+ +---------------+-----------+---------------+--------------+----------------+
2273
+ | linear | 3 | √ | × | Ascend,GPU,CPU |
2059
2274
  +---------------+-----------+---------------+--------------+----------------+
2060
2275
  | bilinear | 4 | √ | × | Ascend,GPU,CPU |
2061
2276
  +---------------+-----------+---------------+--------------+----------------+
2062
- | bicubic | 4 | √ | × | GPU,CPU |
2277
+ | bicubic | 4 | √ | × | Ascend,GPU,CPU |
2063
2278
  +---------------+-----------+---------------+--------------+----------------+
2064
2279
  | area | 3 | \- | √ | Ascend,GPU,CPU |
2065
2280
  +---------------+-----------+---------------+--------------+----------------+
2066
- | | 4 | \- | √ | GPU |
2281
+ | | 4 | \- | √ | Ascend,GPU,CPU |
2067
2282
  +---------------+-----------+---------------+--------------+----------------+
2068
- | | 5 | \- | √ | GPU,CPU |
2283
+ | | 5 | \- | √ | Ascend,GPU,CPU |
2069
2284
  +---------------+-----------+---------------+--------------+----------------+
2070
2285
  | nearest-exact | 3 | \- | × | Ascend,CPU |
2071
2286
  +---------------+-----------+---------------+--------------+----------------+
2072
2287
  | | 4 | \- | × | Ascend,CPU |
2073
2288
  +---------------+-----------+---------------+--------------+----------------+
2289
+ | trilinear | 5 | √ | √ | Ascend,GPU,CPU |
2290
+ +---------------+-----------+---------------+--------------+----------------+
2074
2291
 
2075
2292
  - `-` indicates that there is no such parameter.
2076
2293
  - `×` indicates that this parameter is not currently supported.
@@ -2106,73 +2323,68 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
2106
2323
 
2107
2324
  def run_nearest(x, size, align_corners=None, scale_factor=None):
2108
2325
  # 3D 4D use ResizeNearestNeighborV2, 5D use UpsampleNearest3D
2109
- if x.ndim == 3:
2110
- size = seq.TupleToTensor()((size[0], 1), mstype.int32)
2326
+ x_rank = F.rank(x)
2327
+ if size is not None and x_rank == 3:
2328
+ t1 = seq.TupleToTensor()(size[:1], mstype.int32)
2329
+ t2 = Tensor([1], mstype.int32)
2330
+ size = F.concat([t1, t2])
2111
2331
  x = x.unsqueeze(-1)
2112
- x = _get_cache_prim(P.ResizeNearestNeighborV2)(data_format="NCHW")(x, size)
2332
+ x = _get_cache_prim(P.ResizeNearestNeighborV2)()(
2333
+ x, size)
2113
2334
  x = P.Squeeze(-1)(x)
2114
- elif x.ndim == 4:
2115
- if isinstance(size, int):
2116
- size = F.scalar_to_tensor(size, mstype.int32)
2117
- elif isinstance(size, tuple):
2118
- size = seq.TupleToTensor()(size, mstype.int32)
2119
- else:
2120
- size = seq.ListToTensor()(size, mstype.int32)
2121
- x = _get_cache_prim(P.ResizeNearestNeighborV2)(data_format="NCHW")(x, size)
2335
+ elif size is not None and x_rank == 4:
2336
+ size = seq.TupleToTensor()(size[:2], mstype.int32)
2337
+ x = _get_cache_prim(P.ResizeNearestNeighborV2)()(
2338
+ x, size)
2122
2339
  else:
2123
- x = _get_cache_prim(P.UpsampleNearest3D)(size, scales=scale_factor)(x)
2340
+ x = _get_cache_prim(P.UpsampleNearest3D)()(x, size, scale_factor)
2124
2341
  return x
2125
2342
 
2126
2343
  def run_linear(x, size, align_corners=None, scale_factor=None):
2127
2344
  coordinate_transformation_mode = "align_corners" if align_corners else "half_pixel"
2128
- resize = _get_cache_prim(P.image_ops.ResizeLinear1D)(
2129
- coordinate_transformation_mode
2130
- )
2345
+ resize = _get_cache_prim(
2346
+ P.image_ops.ResizeLinear1D)(coordinate_transformation_mode)
2131
2347
  return resize(x, size)
2132
2348
 
2133
2349
  def run_bilinear(x, size, align_corners=None, scale_factor=None):
2134
- resize = _get_cache_prim(P.ResizeBilinearV2)(align_corners, not align_corners)
2350
+ resize = _get_cache_prim(P.ResizeBilinearV2)(align_corners,
2351
+ not align_corners)
2135
2352
  return resize(x, size)
2136
2353
 
2137
2354
  def run_trilinear(x, size, align_corners=None, scale_factor=None):
2138
- resize = _get_cache_prim(P.nn_ops.UpsampleTrilinear3D)(
2139
- output_size=size, scales=scale_factor, align_corners=align_corners
2140
- )
2141
- return resize(x)
2355
+ resize = _get_cache_prim(
2356
+ P.nn_ops.UpsampleTrilinear3D)(align_corners=align_corners)
2357
+ return resize(x, size, scale_factor)
2142
2358
 
2143
2359
  def run_bicubic(x, size, align_corners=None, scale_factor=None):
2144
2360
  resize = _get_cache_prim(P.image_ops.ResizeBicubic)(
2145
- align_corners=align_corners, half_pixel_centers=not align_corners
2146
- )
2147
- if isinstance(size, int):
2148
- size = F.scalar_to_tensor(size, mstype.int32)
2149
- elif isinstance(size, tuple):
2150
- size = seq.TupleToTensor()(size, mstype.int32)
2151
- else:
2152
- size = seq.ListToTensor()(size, mstype.int32)
2361
+ align_corners=align_corners, half_pixel_centers=not align_corners)
2362
+ size = seq.TupleToTensor()(size, mstype.int32)
2153
2363
  x = resize(x, size)
2154
2364
  return x
2155
2365
 
2156
2366
  def run_area(x, size, align_corners=None, scale_factor=None):
2157
- if x.ndim == 3:
2367
+ x_rank = F.rank(x)
2368
+ if x_rank == 3:
2158
2369
  x = ops.adaptive_avg_pool1d(x, size[0])
2159
- elif x.ndim == 4:
2370
+ elif x_rank == 4:
2160
2371
  x = ops.adaptive_avg_pool2d(x, tuple(size))
2161
2372
  else:
2162
2373
  x = ops.adaptive_avg_pool3d(x, tuple(size))
2163
2374
  return x
2164
2375
 
2165
2376
  def run_nearest_exact(x, size, align_corners=None, scale_factor=None):
2166
- if x.ndim == 3:
2377
+ x_rank = F.rank(x)
2378
+ if x_rank == 3:
2167
2379
  size = seq.TupleToTensor()((size[0], 1), mstype.int32)
2168
2380
  # For impl of nearest 3D use 4D.
2169
2381
  x = x.unsqueeze(-1)
2170
2382
  resize = _get_cache_prim(P.ResizeNearestNeighborV2)(
2171
- data_format="NCHW", align_corners=False, half_pixel_centers=True
2172
- )
2383
+ align_corners=False,
2384
+ half_pixel_centers=True)
2173
2385
  x = resize(x, size)
2174
2386
  x = P.Squeeze(-1)(x)
2175
- if x.ndim == 4:
2387
+ if x_rank == 4:
2176
2388
  if isinstance(size, int):
2177
2389
  size = F.scalar_to_tensor(size, mstype.int32)
2178
2390
  elif isinstance(size, tuple):
@@ -2180,18 +2392,41 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
2180
2392
  else:
2181
2393
  size = seq.ListToTensor()(size, mstype.int32)
2182
2394
  resize = _get_cache_prim(P.ResizeNearestNeighborV2)(
2183
- data_format="NCHW", align_corners=False, half_pixel_centers=True
2184
- )
2395
+ align_corners=False,
2396
+ half_pixel_centers=True)
2185
2397
  x = resize(x, size)
2186
2398
  return x
2187
2399
 
2188
2400
  supported_dict = {
2189
- "nearest": {3: (), 4: ()},
2190
- "linear": {3: ("align_corners",)},
2191
- "bilinear": {4: ("align_corners",)},
2192
- "bicubic": {4: ("align_corners",)},
2193
- "area": {3: ("scale_factor",), 4: ("scale_factor",), 5: ("scale_factor",)},
2194
- "nearest-exact": {3: (), 4: ()},
2401
+ "nearest": {
2402
+ 3: (),
2403
+ 4: (),
2404
+ 5: ("scale_factor",)
2405
+ },
2406
+ "linear": {
2407
+ 3: ("align_corners",)
2408
+ },
2409
+ "bilinear": {
2410
+ 4: ("align_corners",)
2411
+ },
2412
+ "bicubic": {
2413
+ 4: ("align_corners",)
2414
+ },
2415
+ "area": {
2416
+ 3: ("scale_factor",),
2417
+ 4: ("scale_factor",),
2418
+ 5: ("scale_factor",)
2419
+ },
2420
+ "nearest-exact": {
2421
+ 3: (),
2422
+ 4: ()
2423
+ },
2424
+ "trilinear": {
2425
+ 5: (
2426
+ "align_corners",
2427
+ "scale_factor",
2428
+ )
2429
+ },
2195
2430
  }
2196
2431
  resize_func = {
2197
2432
  "nearest": run_nearest,
@@ -2202,76 +2437,87 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
2202
2437
  "area": run_area,
2203
2438
  "nearest-exact": run_nearest_exact,
2204
2439
  }
2440
+
2205
2441
  if not isinstance(input, Tensor):
2206
- raise TypeError(f"For 'interpolate', 'input' must be a tensor, but got {type(input)}")
2442
+ raise TypeError(
2443
+ f"For 'interpolate', 'input' must be a tensor, but got {type(input)}"
2444
+ )
2445
+
2446
+ if isinstance(size, list):
2447
+ size = tuple(size)
2448
+ if isinstance(scale_factor, list):
2449
+ scale_factor = tuple(scale_factor)
2450
+
2451
+ rank = F.rank(input)
2452
+ shape = F.shape(input)
2453
+ dim_unknown = _is_dim_unknown(shape)
2454
+
2455
+ # check for size and scale_factor
2207
2456
  if size is not None and scale_factor is not None:
2208
2457
  raise ValueError(
2209
2458
  "For 'interpolate', 'size' and 'scale_factor' cannot be set simultaneously"
2210
2459
  )
2211
2460
  if size is not None:
2212
2461
  if isinstance(size, (list, tuple)):
2213
- if len(size) != input.ndim - 2:
2214
- raise ValueError(
2215
- f"For 'interpolate', 'input' and 'size' must have the same spatial dimensions, "
2216
- f"but got 'input' is {input.ndim - 2}D, 'size' is {len(size)}D"
2217
- )
2218
2462
  check_positive_int_sequence_const(size, "size", "interpolate")
2463
+ if dim_unknown is False:
2464
+ _interpolate_size_check_with_rank(size, rank)
2219
2465
  else:
2220
2466
  check_positive_int_const(size, "size", "interpolate")
2221
- size = [size for _ in range(input.ndim - 2)]
2467
+ if dim_unknown is False:
2468
+ size = tuple([size for _ in range(rank - 2)])
2469
+ else:
2470
+ size = _interploate_make_tuple(rank - 2, size)
2222
2471
  elif scale_factor is not None:
2223
2472
  if isinstance(scale_factor, (list, tuple)):
2224
- if len(scale_factor) != input.ndim - 2:
2225
- raise ValueError(
2226
- f"For 'interpolate', 'input' and 'scale_factor' must have the same spatial dimensions, "
2227
- f"but got 'input' is {input.ndim - 2}D, 'scale_factor' is {len(scale_factor)}D"
2228
- )
2229
- check_positive_float_sequence_const(scale_factor, "scale_factor", "interpolate")
2473
+ check_positive_float_sequence_const(scale_factor, "scale_factor",
2474
+ "interpolate")
2475
+ if dim_unknown is False:
2476
+ _interpolate_scale_factor_check_with_rank(scale_factor, rank)
2230
2477
  else:
2231
- check_positive_float_const(scale_factor, "scale_factor", "interpolate")
2232
- scale_factor = [scale_factor for _ in range(input.ndim - 2)]
2478
+ check_positive_float_const(scale_factor, "scale_factor",
2479
+ "interpolate")
2480
+ if dim_unknown is False:
2481
+ scale_factor = tuple([scale_factor for _ in range(rank - 2)])
2482
+ else:
2483
+ scale_factor = _interploate_make_tuple(rank - 2, scale_factor)
2233
2484
  else:
2234
2485
  raise ValueError(
2235
2486
  "For 'interpolate', 'size' and 'scale_factor' cannot be both empty"
2236
2487
  )
2237
2488
 
2238
- if isinstance(mode, list) or mode not in supported_dict:
2239
- raise ValueError(
2240
- f"For 'interpolate', 'mode' must be in '{list(supported_dict)}', but got {mode}"
2241
- )
2242
- if input.ndim not in supported_dict.get(mode):
2243
- raise ValueError(
2244
- f"For 'interpolate', {mode} only support '{list(supported_dict.get(mode, {}))}'D, but got {input.ndim}D"
2245
- )
2489
+ # rank check
2490
+ _interpolate_mode_check(mode, supported_dict)
2491
+ if dim_unknown is False:
2492
+ _interpolate_rank_check(rank, mode, supported_dict)
2493
+
2246
2494
  # "area" mode always requires an explicit size rather than scale factor.
2247
2495
  if mode == "area" and size is None:
2248
2496
  recompute_scale_factor = True
2497
+
2498
+ # recompute_scale_factor
2249
2499
  if recompute_scale_factor is not None and recompute_scale_factor:
2250
- check_bool_const(recompute_scale_factor, "recompute_scale_factor", "interpolate")
2500
+ check_bool_const(recompute_scale_factor, "recompute_scale_factor",
2501
+ "interpolate")
2251
2502
  if size is not None:
2252
2503
  raise ValueError(
2253
2504
  "For 'interpolate', it is incorrect to set 'recompute_scale_factor' to True"
2254
- " after specifying an explicit 'size'."
2255
- )
2256
- size = _scale_factor_convert_size(input.shape, scale_factor, input.ndim - 2)
2505
+ " after specifying an explicit 'size'.")
2506
+ size = _interpolate_scale_factor_convert_size(shape, scale_factor)
2257
2507
  scale_factor = None
2258
2508
  else:
2259
- if scale_factor is not None and "scale_factor" not in supported_dict.get(mode, {}).get(input.ndim):
2260
- raise ValueError(
2261
- f"For 'interpolate', 'scale_factor' option cannot currently be set with the "
2262
- f"mode = {mode} and dim = {input.ndim}D."
2263
- )
2509
+ if dim_unknown is False:
2510
+ _interpolate_scale_factor_check(scale_factor, mode, rank,
2511
+ supported_dict)
2512
+
2513
+ # align_corners
2264
2514
  if align_corners is not None:
2265
2515
  check_bool_const(align_corners, "align_corners", "interpolate")
2266
- if "align_corners" not in supported_dict.get(mode, {}).get(input.ndim):
2267
- raise ValueError(
2268
- f"For 'interpolate', 'align_corners' option cannot currently be set with the "
2269
- f"mode = {mode}, and dim = {input.ndim}D"
2270
- )
2516
+ if dim_unknown is False:
2517
+ _interpolate_align_corners_mode_check(rank, mode, supported_dict)
2271
2518
  else:
2272
2519
  align_corners = False
2273
- if isinstance(size, list):
2274
- size = tuple(size)
2520
+
2275
2521
  return resize_func.get(mode)(input, size, align_corners, scale_factor)
2276
2522
 
2277
2523
 
@@ -2309,6 +2555,9 @@ def softsign(x):
2309
2555
  ``Ascend`` ``GPU`` ``CPU``
2310
2556
 
2311
2557
  Examples:
2558
+ >>> import mindspore
2559
+ >>> import numpy as np
2560
+ >>> from mindspore import Tensor, ops
2312
2561
  >>> x = Tensor(np.array([0, -1, 2, 30, -30]), mindspore.float32)
2313
2562
  >>> output = ops.softsign(x)
2314
2563
  >>> print(output)
@@ -2317,6 +2566,60 @@ def softsign(x):
2317
2566
  return softsign_(x)
2318
2567
 
2319
2568
 
2569
+ def soft_margin_loss(input, target, reduction='mean'):
2570
+ r"""
2571
+ Calculate the soft margin loss of input and target.
2572
+
2573
+ Creates a criterion that optimizes a two-class classification
2574
+ logistic loss between input tensor :math:`x` and target tensor :math:`y`
2575
+ (containing 1 or -1).
2576
+
2577
+ .. math::
2578
+ \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
2579
+
2580
+ where :math:`x.nelement()` is the number of elements of :math:`x`.
2581
+
2582
+ .. warning::
2583
+ This is an experimental API that is subject to change or deletion.
2584
+
2585
+ Args:
2586
+ input (Tensor): Predict data. Data type must be float16 or float32.
2587
+ target (Tensor): Ground truth data, with the same type and shape as `logits`.
2588
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2589
+ ``'sum'`` . Default: ``'mean'`` .
2590
+
2591
+ - ``'none'``: no reduction will be applied.
2592
+ - ``'mean'``: compute and return the mean of elements in the output.
2593
+ - ``'sum'``: the output elements will be summed.
2594
+
2595
+ Outputs:
2596
+ Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `logits`.
2597
+ Otherwise, a scalar value will be returned.
2598
+
2599
+ Raises:
2600
+ TypeError: If `input` or `target` is not a Tensor.
2601
+ TypeError: If dtype of `input` or `target` is neither float16 nor float32.
2602
+ ValueError: If shape of `input` is not the same as that of `target`.
2603
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
2604
+
2605
+ Supported Platforms:
2606
+ ``Ascend`` ``GPU``
2607
+
2608
+ Examples:
2609
+ >>> import mindspore
2610
+ >>> import numpy as np
2611
+ >>> from mindspore import Tensor, ops
2612
+ >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
2613
+ >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
2614
+ >>> output = ops.soft_margin_loss(logits, labels)
2615
+ >>> print(output)
2616
+ 0.6764238
2617
+ """
2618
+ soft_margin_loss_op = _get_cache_prim(P.SoftMarginLoss)(reduction=reduction)
2619
+ output = soft_margin_loss_op(input, target)
2620
+ return output
2621
+
2622
+
2320
2623
  def softmax(x, axis=-1, *, dtype=None):
2321
2624
  r"""
2322
2625
  Applies the Softmax operation to the input tensor on the specified axis.
@@ -2324,18 +2627,18 @@ def softmax(x, axis=-1, *, dtype=None):
2324
2627
  the Softmax function is shown as follows:
2325
2628
 
2326
2629
  .. math::
2327
- \text{output}(x_i) = \frac{exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)},
2630
+ \text{output}(x_i) = \frac{\exp(x_i)}{\sum_{j = 0}^{N-1}\exp(x_j)},
2328
2631
 
2329
2632
  where :math:`N` is the length of the tensor.
2330
2633
 
2331
2634
  Args:
2332
- axis (Union[int, tuple[int]], optional): The axis to perform the Softmax operation. Default: -1.
2635
+ axis (Union[int, tuple[int]], optional): The axis to perform the Softmax operation. Default: ``-1`` .
2333
2636
  x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
2334
2637
  additional dimensions, with float16 or float32 data type.
2335
2638
 
2336
2639
  Keyword Args:
2337
2640
  dtype (:class:`mindspore.dtype`, optional): When set, `x` will be converted to the specified type,
2338
- `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: None.
2641
+ `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: ``None`` .
2339
2642
 
2340
2643
  Returns:
2341
2644
  Tensor, with the same type and shape as the logits.
@@ -2350,6 +2653,9 @@ def softmax(x, axis=-1, *, dtype=None):
2350
2653
  ``Ascend`` ``GPU`` ``CPU``
2351
2654
 
2352
2655
  Examples:
2656
+ >>> import mindspore
2657
+ >>> import numpy as np
2658
+ >>> from mindspore import Tensor, ops
2353
2659
  >>> x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
2354
2660
  >>> output = ops.softmax(x)
2355
2661
  >>> print(output)
@@ -2372,18 +2678,18 @@ def softmin(x, axis=-1, *, dtype=None):
2372
2678
  the Softmin function is shown as follows:
2373
2679
 
2374
2680
  .. math::
2375
- \text{output}(x_i) = \frac{exp(-x_i)}{\sum_{j = 0}^{N-1}\exp(-x_j)},
2681
+ \text{output}(x_i) = \frac{\exp(-x_i)}{\sum_{j = 0}^{N-1}\exp(-x_j)},
2376
2682
 
2377
2683
  where :math:`N` is the length of the tensor.
2378
2684
 
2379
2685
  Args:
2380
- axis (Union[int, tuple[int]], optional): The axis to perform the Softmin operation. Default: -1.
2686
+ axis (Union[int, tuple[int]], optional): The axis to perform the Softmin operation. Default: ``-1`` .
2381
2687
  x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
2382
2688
  additional dimensions, with float16 or float32 data type.
2383
2689
 
2384
2690
  Keyword Args:
2385
2691
  dtype (:class:`mindspore.dtype`, optional): When set, `x` will be converted to the specified type,
2386
- `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: None.
2692
+ `dtype`, before execution, and dtype of returned Tensor will also be `dtype`. Default: ``None`` .
2387
2693
 
2388
2694
  Returns:
2389
2695
  Tensor, with the same type and shape as the logits.
@@ -2398,6 +2704,9 @@ def softmin(x, axis=-1, *, dtype=None):
2398
2704
  ``Ascend`` ``GPU`` ``CPU``
2399
2705
 
2400
2706
  Examples:
2707
+ >>> import mindspore
2708
+ >>> import numpy as np
2709
+ >>> from mindspore import Tensor, ops
2401
2710
  >>> x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16)
2402
2711
  >>> output = ops.softmin(x)
2403
2712
  >>> print(output)
@@ -2407,7 +2716,7 @@ def softmin(x, axis=-1, *, dtype=None):
2407
2716
  if dtype is not None:
2408
2717
  x = ops.cast(x, dtype)
2409
2718
  softmax_ = _get_cache_prim(P.Softmax)(axis=axis)
2410
- return softmax_(-x)
2719
+ return softmax_(-1*x)
2411
2720
 
2412
2721
 
2413
2722
  def softshrink(x, lambd=0.5):
@@ -2424,7 +2733,7 @@ def softshrink(x, lambd=0.5):
2424
2733
 
2425
2734
  Args:
2426
2735
  x (Tensor): The input of soft shrink with data type of float16 or float32.
2427
- lambd (float): The :math:`\lambda` must be no less than zero. Default: 0.5.
2736
+ lambd (float): The :math:`\lambda` must be no less than zero. Default: ``0.5`` .
2428
2737
 
2429
2738
  Returns:
2430
2739
  Tensor, has the same shape and data type as `x`.
@@ -2461,6 +2770,55 @@ def soft_shrink(input, lambd=0.5):
2461
2770
  return soft_shrink_op(input)
2462
2771
 
2463
2772
 
2773
+ def softplus(input, beta=1, threshold=20): # pylint:disable=redefined-outer-name
2774
+ r"""
2775
+ Applies softplus function to `input` element-wise.
2776
+
2777
+ The softplus function is shown as follows, x is the element of `input` :
2778
+
2779
+ .. math::
2780
+
2781
+ \text{output} = \frac{1}{beta}\log(1 + \exp(\text{beta * x}))
2782
+
2783
+ When :math:`input * beta > threshold`, the implementation converts to the linear function
2784
+ to ensure numerical stability.
2785
+
2786
+ Args:
2787
+ input (Tensor) - Tensor of any dimension.
2788
+ Supported dtypes:
2789
+
2790
+ - GPU/CPU: float16, float32, float64.
2791
+ - Ascend: float16, float32.
2792
+
2793
+ beta (int, optional) - The :math:`\beta` value in softplus function. Default: ``1`` .
2794
+ threshold (int, optional) - When :math:`input * beta > threshold`, converting softplus to a linear function.
2795
+ Default: ``20`` .
2796
+
2797
+ Returns:
2798
+ Tensor, with the same type and shape as the `input` .
2799
+
2800
+ Raises:
2801
+ TypeError: If `input` is not a Tensor.
2802
+ TypeError: If the dtype of `input` is not float16, float32 or float64.
2803
+
2804
+ Supported Platforms:
2805
+ ``Ascend`` ``GPU`` ``CPU``
2806
+
2807
+ Examples:
2808
+ >>> import mindspore
2809
+ >>> import numpy as np
2810
+ >>> from mindspore import Tensor, ops
2811
+ >>> input = Tensor(np.array([0.1, 0.2, 30, 25]), mindspore.float32)
2812
+ >>> output = ops.softplus(input)
2813
+ >>> print(output)
2814
+ [0.7443967 0.79813886 30. 25.]
2815
+ """
2816
+ softplus_op = _get_cache_prim(P.Softplus)()
2817
+ scaling_input = beta * input
2818
+ op_output = (1 / beta) * softplus_op(scaling_input)
2819
+ return ops.select(input * beta > threshold, input, op_output)
2820
+
2821
+
2464
2822
  def silu(x):
2465
2823
  r"""
2466
2824
  Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
@@ -2526,6 +2884,9 @@ def selu(input_x):
2526
2884
  ``Ascend`` ``GPU`` ``CPU``
2527
2885
 
2528
2886
  Examples:
2887
+ >>> import mindspore
2888
+ >>> import numpy as np
2889
+ >>> from mindspore import Tensor, ops
2529
2890
  >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
2530
2891
  >>> output = ops.selu(input_x)
2531
2892
  >>> print(output)
@@ -2559,12 +2920,15 @@ def sigmoid(input):
2559
2920
  ``Ascend`` ``GPU`` ``CPU``
2560
2921
 
2561
2922
  Examples:
2923
+ >>> import mindspore
2924
+ >>> import numpy as np
2925
+ >>> from mindspore import Tensor, ops
2562
2926
  >>> input = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
2563
2927
  >>> output = ops.sigmoid(input)
2564
2928
  >>> print(output)
2565
2929
  [0.7310586 0.880797 0.95257413 0.98201376 0.9933072 ]
2566
2930
  """
2567
- return sigmoid_(input)
2931
+ return _get_cache_prim(NN_OPS.Sigmoid)()(input)
2568
2932
 
2569
2933
 
2570
2934
  def logsigmoid(x):
@@ -2574,7 +2938,7 @@ def logsigmoid(x):
2574
2938
  Logsigmoid is defined as:
2575
2939
 
2576
2940
  .. math::
2577
- \text{logsigmoid}(x_{i}) = log(\frac{1}{1 + \exp(-x_i)}),
2941
+ \text{logsigmoid}(x_{i}) = \log(\frac{1}{1 + \exp(-x_i)}),
2578
2942
 
2579
2943
  where :math:`x_{i}` is the element of the input.
2580
2944
 
@@ -2589,22 +2953,178 @@ def logsigmoid(x):
2589
2953
  TypeError: If dtype of `x` is neither float16 nor float32.
2590
2954
 
2591
2955
  Supported Platforms:
2592
- ``Ascend`` ``GPU``
2956
+ ``Ascend`` ``GPU`` ``CPU``
2593
2957
 
2594
2958
  Examples:
2959
+ >>> import mindspore
2960
+ >>> import numpy as np
2961
+ >>> from mindspore import Tensor, ops
2595
2962
  >>> x = Tensor(np.array([1.0, 2.0, 3.0]), mindspore.float32)
2596
2963
  >>> output = ops.logsigmoid(x)
2597
2964
  >>> print(output)
2598
2965
  [-0.31326166 -0.12692806 -0.04858734]
2599
2966
  """
2600
- output = _get_cache_prim(P.Mul)()(x, -1)
2601
- output = _get_cache_prim(P.Exp)()(output)
2602
- output = _get_cache_prim(P.Add)()(output, 1)
2603
- output = _get_cache_prim(P.Reciprocal)()(output)
2967
+ output = _get_cache_prim(P.Sigmoid)()(x)
2604
2968
  ret = _get_cache_prim(P.Log)()(output)
2605
2969
  return ret
2606
2970
 
2607
2971
 
2972
+ def dense(input, weight, bias=None):
2973
+ r"""
2974
+ Applies the dense connected operation to the `input`. The dense function is defined as:
2975
+
2976
+ .. math::
2977
+ output = input * weight^{T} + bias
2978
+
2979
+ .. warning::
2980
+ This is an experimental API that is subject to change or deletion.
2981
+
2982
+ Args:
2983
+ input (Tensor): Input Tensor of shape :math:`(*, in\_channels)`,
2984
+ where :math:`*` means any number of additional dimensions.
2985
+ weight (Tensor): The weight applied to the input.
2986
+ The shape is :math:`(out\_channels, in\_channels)` or :math:`(in\_channels)`.
2987
+ bias (Tensor, optional): Additive biases to the output.
2988
+ The shape is :math:`(out\_channels)` or :math:`()`. Defaults: ``None``, the `bias` is 0.
2989
+
2990
+ Returns:
2991
+ Output whose shape is determined by the shape of the input and the weight.
2992
+
2993
+ Raises:
2994
+ TypeError: If `input` is not Tensor.
2995
+ TypeError: If `weight` is not Tensor.
2996
+ TypeError: If `bias` is not Tensor.
2997
+
2998
+ Supported Platforms:
2999
+ ``Ascend`` ``GPU`` ``CPU``
3000
+
3001
+ Examples:
3002
+ >>> import numpy as np
3003
+ >>> from mindspore import Tensor, ops
3004
+ >>> input = mindspore.Tensor([[-1., 1., 2.], [-3., -3., 1.]], mindspore.float32)
3005
+ >>> weight = mindspore.Tensor([[-2., -2., -2.], [0., -1., 0.]], mindspore.float32)
3006
+ >>> bias = mindspore.Tensor([0., 1.], mindspore.float32)
3007
+ >>> output = mindspore.ops.dense(input, weight, bias)
3008
+ >>> print(output)
3009
+ [[-4. 0.]
3010
+ [10. 4.]]
3011
+ """
3012
+ _check_is_tensor("input", input, "dense")
3013
+ _check_is_tensor("weight", weight, "dense")
3014
+ _check_is_tensor("bias", bias, "dense")
3015
+ weight = ops.t(weight)
3016
+ input = ops.matmul(input, weight)
3017
+ input_shape = input.shape
3018
+ if bias is not None:
3019
+ input = input + bias
3020
+ _check_dense_add_bias_shape(input_shape, input.shape, bias.shape)
3021
+ return input
3022
+
3023
+
3024
+ def _check_dense_add_bias_shape(input_shape, output_shape, bias_shape):
3025
+ """Check that the output has the correct shape after adding bias."""
3026
+ if input_shape != output_shape:
3027
+ raise ValueError(f"For dense, the bias shape {bias_shape} does not match the input shape {input_shape}.")
3028
+
3029
+
3030
+ @_primexpr
3031
+ def check_dense_inputs_same_shape(input1_shape, input2_shape, prim_name=None):
3032
+ """check bidense input Tensors' shape"""
3033
+ msg_prefix = f"For '{prim_name}', the" if prim_name else "The"
3034
+ if input1_shape[:-1] != input2_shape[:-1]:
3035
+ raise ValueError(f"{msg_prefix} dimensions except the last of 'input1' must be same as 'input2', but got "
3036
+ f"{input1_shape} of 'input1' and {input2_shape} of 'input2'")
3037
+
3038
+
3039
+ def bidense(input1, input2, weight, bias=None):
3040
+ r"""
3041
+ Applies bilinear dense connected layer for `input1` and `input2`. The bilinear dense function is defined as:
3042
+
3043
+ .. math::
3044
+ output = x_{1}^{T}Ax_{2} + b
3045
+
3046
+ :math:`x_{1}` represents `input1` , :math:`x_{2}` represents `input2` , :math:`A` represents `weight` ,
3047
+ :math:`b` represents `bias` .
3048
+
3049
+ .. warning::
3050
+ This is an experimental API that is subject to change or deletion.
3051
+
3052
+ Args:
3053
+ input1 (Tensor): Input Tensor of shape :math:`(*, in1\_channels)`,
3054
+ where :math:`*` means any number of additional dimensions. All but the last dimension
3055
+ should be the same with `input2`.
3056
+ input2 (Tensor): Input Tensor of shape :math:`(*, in2\_channels)`,
3057
+ where :math:`*` means any number of additional dimensions. All but the last dimension
3058
+ should be the same with `input1`.
3059
+ weight (Tensor): The weight applied to the input1 and input2.
3060
+ The shape is :math:`(out\_channels, in1\_channels, in2\_channels)`.
3061
+ bias (Tensor, optional): Additive biases to the output.
3062
+ The shape is :math:`(out\_channels)` or :math:`()`. Defaults: ``None`` , the `bias` is 0.
3063
+
3064
+ Returns:
3065
+ Tensor, shape :math:`(*, out\_channels)`, where :math:`*` means any number of additional dimensions.
3066
+ All but the last dimension should be the same with the input Tensors.
3067
+
3068
+ Raises:
3069
+ TypeError: If `input1` is not Tensor.
3070
+ TypeError: If `input2` is not Tensor.
3071
+ TypeError: If `weight` is not Tensor.
3072
+ TypeError: If `bias` is not Tensor.
3073
+ ValueError: If dimensions except the last of 'input1' are different from 'input2' .
3074
+
3075
+
3076
+ Supported Platforms:
3077
+ ``Ascend`` ``GPU`` ``CPU``
3078
+
3079
+ Examples:
3080
+ >>> import mindspore
3081
+ >>> from mindspore import Tensor, ops
3082
+ >>> input1 = mindspore.Tensor([[-1.1283, 1.2603],
3083
+ ... [0.0214, 0.7801],
3084
+ ... [-1.2086, 1.2849]], mindspore.float32)
3085
+ >>> input2 = mindspore.Tensor([[-0.4631, 0.3238, 0.4201],
3086
+ ... [0.6215, -1.0910, -0.5757],
3087
+ ... [-0.7788, -0.0706, -0.7942]], mindspore.float32)
3088
+ >>> weight = mindspore.Tensor([[[-0.3132, 0.9271, 1.1010],
3089
+ ... [0.6555, -1.2162, -0.2987]],
3090
+ ... [[1.0458, 0.5886, 0.2523],
3091
+ ... [-1.3486, -0.8103, -0.2080]],
3092
+ ... [[1.1685, 0.5569, -0.3987],
3093
+ ... [-0.4265, -2.6295, 0.8535]],
3094
+ ... [[0.6948, -1.1288, -0.6978],
3095
+ ... [0.3511, 0.0609, -0.1122]]], mindspore.float32)
3096
+ >>> output = ops.bidense(input1, input2, weight)
3097
+ >>> print(output)
3098
+ [[-2.0612743 0.5581219 0.22383511 0.8667302]
3099
+ [1.4476739 0.12626505 1.6552988 0.21297503]
3100
+ [0.6003161 2.912046 0.5590313 -0.35449564]]
3101
+ """
3102
+ _check_is_tensor("input1", input1, "bidense")
3103
+ _check_is_tensor("input2", input2, "bidense")
3104
+ _check_is_tensor("weight", weight, "bidense")
3105
+ _check_is_tensor("bias", bias, "bidense")
3106
+ input1_shape = input1.shape
3107
+ input2_shape = input2.shape
3108
+ check_dense_inputs_same_shape(input1_shape, input2_shape, "bidense")
3109
+
3110
+ if len(input1_shape) != 2:
3111
+ input1 = input1.reshape((-1, input1_shape[-1]))
3112
+ input2 = input2.reshape((-1, input2_shape[-1]))
3113
+ batch_size = input1.shape[0]
3114
+ matmul_ = P.MatMul()
3115
+ output = matmul_(input1, weight.transpose(1, 2, 0).view(input1_shape[-1], -1))
3116
+ output = output.view(batch_size, input2_shape[-1], weight.shape[0])
3117
+ output = output.transpose(2, 0, 1) * input2
3118
+ output = output.sum(2).swapaxes(0, 1)
3119
+ if bias is not None:
3120
+ bias_add_ = P.BiasAdd()
3121
+ output = bias_add_(output, bias)
3122
+ if len(input1_shape) != 2:
3123
+ output_shape = input1_shape[:-1] + (-1,)
3124
+ output = output.reshape(output_shape)
3125
+ return output
3126
+
3127
+
2608
3128
  def deformable_conv2d(x, weight, offsets, kernel_size, strides, padding, bias=None, dilations=(1, 1, 1, 1), groups=1,
2609
3129
  deformable_groups=1, modulated=True):
2610
3130
  r"""
@@ -2640,16 +3160,16 @@ def deformable_conv2d(x, weight, offsets, kernel_size, strides, padding, bias=No
2640
3160
  padding (tuple[int]): A tuple of 4 integers. The number of pixels to add to each (top, bottom, left,
2641
3161
  right) side of the input.
2642
3162
  bias (Tensor, optional): An 1D tensor of additive biases to the filter outputs.
2643
- The shape is :math:`(C_{out})`. Defaults to None.
3163
+ The shape is :math:`(C_{out})`. Default: ``None`` .
2644
3164
  dilations (tuple[int], optional): A tuple of 4 integers. The dilation factor for each dimension of input. The
2645
3165
  dimension order is interpreted according to the data format of `x`. The N and C dimensions must be set
2646
- to 1. Defaults to (1, 1, 1, 1).
3166
+ to 1. Default: ``(1, 1, 1, 1)`` .
2647
3167
  groups (int, optional): An integer of type int32. The number of blocked connections from input channels
2648
- to output channels. In_channels and out_channels must both be divisible by `groups`. Defaults to 1.
3168
+ to output channels. In_channels and out_channels must both be divisible by `groups`. Default: ``1`` .
2649
3169
  deformable_groups (int, optional): An integer of type int32. The number of deformable group partitions.
2650
- In_channels must be divisible by `deformable_groups`. Defaults to 1.
3170
+ In_channels must be divisible by `deformable_groups`. Default: ``1`` .
2651
3171
  modulated (bool, optional): Specifies version of DeformableConv2D, True means v2, False means v1, currently
2652
- only supports v2. Defaults to True.
3172
+ only supports v2. Default: ``True`` .
2653
3173
 
2654
3174
  Returns:
2655
3175
  Tensor, A 4D Tensor of output feature map. With the same type as `x`. With the format "NCHW",
@@ -2677,6 +3197,9 @@ def deformable_conv2d(x, weight, offsets, kernel_size, strides, padding, bias=No
2677
3197
  ``Ascend`` ``GPU`` ``CPU``
2678
3198
 
2679
3199
  Examples:
3200
+ >>> import numpy as np
3201
+ >>> from mindspore import Tensor, ops
3202
+ >>> from mindspore import dtype as mstype
2680
3203
  >>> x = Tensor(np.ones((4, 3, 10, 10)), mstype.float32)
2681
3204
  >>> kh, kw = 3, 3
2682
3205
  >>> weight = Tensor(np.ones((5, 3, kh, kw)), mstype.float32)
@@ -2718,7 +3241,7 @@ def pdist(input, p=2.0):
2718
3241
  Args:
2719
3242
  input (Tensor): Input tensor of shape :math:`(*B, N, M)`. :math:`*B` is batch size, one-dim or multi-dim.
2720
3243
  dtype: float16, float32 or float64.
2721
- p (float): The order of norm distance, :math:`p∈[0, ∞)`. Default: 2.0.
3244
+ p (float): The order of norm distance, :math:`p∈[0, ∞)`. Default: ``2.0`` .
2722
3245
 
2723
3246
  Returns:
2724
3247
  Tensor, has the same dtype as `input`.
@@ -2734,6 +3257,8 @@ def pdist(input, p=2.0):
2734
3257
  ``GPU`` ``CPU``
2735
3258
 
2736
3259
  Examples:
3260
+ >>> import numpy as np
3261
+ >>> from mindspore import Tensor, ops
2737
3262
  >>> x = Tensor(np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]).astype(np.float32))
2738
3263
  >>> y = ops.pdist(x, p=2.0)
2739
3264
  >>> print(y)
@@ -2743,7 +3268,7 @@ def pdist(input, p=2.0):
2743
3268
  return pdist_(input)
2744
3269
 
2745
3270
 
2746
- @constexpr
3271
+ @_primexpr
2747
3272
  def _check_pad_inputs(padding):
2748
3273
  """check the input of pad"""
2749
3274
  if len(padding) % 2 != 0:
@@ -2779,7 +3304,8 @@ def pad(input_x, padding, mode='constant', value=None):
2779
3304
  :math:`\text{padding_top}, \text{padding_bottom}`,
2780
3305
  :math:`\text{padding_front}, \text{padding_back})` and so on.
2781
3306
 
2782
- mode (str, optional): Pad filling mode, "constant", "reflect" or "replicate". Default: "constant".
3307
+ mode (str, optional): Pad filling mode, ``"constant"`` , ``"reflect"`` , ``"replicate"`` or ``"circular"`` .
3308
+ Default: ``'constant'`` .
2783
3309
 
2784
3310
  For "constant" mode, please refer to :class:`mindspore.nn.ConstantPad1d` as an example to understand
2785
3311
  this filling pattern and extend the padding pattern to n dimensions.
@@ -2794,8 +3320,15 @@ def pad(input_x, padding, mode='constant', value=None):
2794
3320
  The replicate mode is used to pad the last three dimensions of 4D or 5D input, the last two dimensions of 3D
2795
3321
  or 4D input, or the last dimension of 2D or 3D input.
2796
3322
 
3323
+ For "circular" mode, the pixels from one edge of the image are wrapped around to the opposite edge,
3324
+ such that the pixel on the right edge of the image is replaced with the pixel on the left edge,
3325
+ and the pixel on the bottom edge is replaced with the pixel on the top edge.
3326
+ The circular mode is used to pad the last three dimensions of 4D or 5D input, the last two dimensions of 3D
3327
+ or 4D input, or the last dimension of 2D or 3D input.
3328
+
2797
3329
  value (Union[int, float, None], optional): Valid only in "constant" mode.
2798
3330
  Set the padding value in "constant" mode. If the value is None, 0 is used as the default padding value.
3331
+ Default: ``None`` .
2799
3332
 
2800
3333
  Returns:
2801
3334
  Tensor, the tensor after padding.
@@ -2808,7 +3341,7 @@ def pad(input_x, padding, mode='constant', value=None):
2808
3341
  ValueError: If mode is not "constant" and value not None.
2809
3342
 
2810
3343
  Supported Platforms:
2811
- ``GPU`` ``CPU``
3344
+ ``Ascend`` ``GPU`` ``CPU``
2812
3345
 
2813
3346
  Examples:
2814
3347
  >>> import mindspore as ms
@@ -2843,6 +3376,18 @@ def pad(input_x, padding, mode='constant', value=None):
2843
3376
  [4. 4. 5. 5.]
2844
3377
  [6. 6. 7. 7.]
2845
3378
  [6. 6. 7. 7.]]]]
3379
+ >>> output3 = ops.pad(x, (1, 1, 2, 1), mode='circular')
3380
+ >>> print(output3)
3381
+ [[[[1. 0. 1. 0.]
3382
+ [3. 2. 3. 2.]
3383
+ [1. 0. 1. 0.]
3384
+ [3. 2. 3. 2.]
3385
+ [1. 0. 1. 0.]]
3386
+ [[5. 4. 5. 4.]
3387
+ [7. 6. 7. 6.]
3388
+ [5. 4. 5. 4.]
3389
+ [7. 6. 7. 6.]
3390
+ [5. 4. 5. 4.]]]]
2846
3391
  """
2847
3392
  if not isinstance(input_x, Tensor):
2848
3393
  raise TypeError(f"For 'pad', the type of 'input_x' must be Tensor, but got {type(input_x)}.")
@@ -2881,19 +3426,17 @@ def relu(input):
2881
3426
 
2882
3427
  .. math::
2883
3428
 
2884
- ReLU(input) = (input)^+ = max(0, input)
3429
+ ReLU(input) = (input)^+ = \max(0, input)
2885
3430
 
2886
3431
  Note:
2887
3432
  In general, this operator is more commonly used. The difference from `ReLuV2` is that the `ReLuV2` will
2888
3433
  output one more Mask.
2889
3434
 
2890
3435
  Args:
2891
- input (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
2892
- additional dimensions, data type is
2893
- `number <https://www.mindspore.cn/docs/en/r2.0/api_python/mindspore.html#mindspore.dtype>`_.
3436
+ input (Tensor): Input Tensor of numeric types.
2894
3437
 
2895
3438
  Returns:
2896
- Tensor of shape :math:`(N, *)`, with the same dtype and shape as the `input`.
3439
+ Tensor, has the same dtype and shape as `input_x`.
2897
3440
 
2898
3441
  Raises:
2899
3442
  TypeError: If dtype of `input` is not a number.
@@ -2903,6 +3446,9 @@ def relu(input):
2903
3446
  ``Ascend`` ``GPU`` ``CPU``
2904
3447
 
2905
3448
  Examples:
3449
+ >>> import mindspore
3450
+ >>> import numpy as np
3451
+ >>> from mindspore import Tensor, ops
2906
3452
  >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
2907
3453
  >>> output = ops.relu(input_x)
2908
3454
  >>> print(output)
@@ -2924,7 +3470,9 @@ def relu6(x):
2924
3470
  It returns :math:`\min(\max(0,x), 6)` element-wise.
2925
3471
 
2926
3472
  Args:
2927
- x (Tensor): Tensor of shape :math:`(N, *)` with float16 or float32 data type.
3473
+ x (Tensor): Tensor of shape :math:`(N, *)`,
3474
+ where :math:`*` means any number of additional dimensions.
3475
+ Data type must be float16, float32.
2928
3476
 
2929
3477
  Returns:
2930
3478
  Tensor, with the same dtype and shape as the `x`.
@@ -2937,6 +3485,9 @@ def relu6(x):
2937
3485
  ``Ascend`` ``GPU`` ``CPU``
2938
3486
 
2939
3487
  Examples:
3488
+ >>> import mindspore
3489
+ >>> import numpy as np
3490
+ >>> from mindspore import Tensor, ops
2940
3491
  >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
2941
3492
  >>> result = ops.relu6(input_x)
2942
3493
  >>> print(result)
@@ -2967,12 +3518,12 @@ def prelu(x, weight):
2967
3518
  The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
2968
3519
  weight (Tensor): Weight Tensor. The data type is float16 or float32.
2969
3520
  The weight can only be a Tensor, and the length is the same as the number of channels C of the `input_x`.
2970
- On GPU devices, when the input is a scalar, the shape is (1,).
3521
+ On GPU devices, when the input is a scalar, the shape is :math:`(1,)` .
2971
3522
 
2972
3523
  Returns:
2973
3524
  Tensor, with the same shape and dtype as `x`.
2974
3525
 
2975
- For detailed information, please refer to :class:`mindspore.nn.PReLU`.
3526
+ For detailed information, please refer to :class:`mindspore.nn.PReLU`.
2976
3527
 
2977
3528
  Raises:
2978
3529
  TypeError: If dtype of `x` or `weight` is neither float16 nor float32.
@@ -2984,6 +3535,9 @@ def prelu(x, weight):
2984
3535
  ``Ascend`` ``GPU`` ``CPU``
2985
3536
 
2986
3537
  Examples:
3538
+ >>> import mindspore
3539
+ >>> import numpy as np
3540
+ >>> from mindspore import Tensor, ops
2987
3541
  >>> x = Tensor(np.arange(-6, 6).reshape((2, 3, 2)), mindspore.float32)
2988
3542
  >>> weight = Tensor(np.array([0.1, 0.6, -0.3]), mindspore.float32)
2989
3543
  >>> output = ops.prelu(x, weight)
@@ -3017,8 +3571,8 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
3017
3571
 
3018
3572
  Args:
3019
3573
  input (Tensor): The input of rrelu is a Tensor of any dimension.
3020
- lower (Union[int, float]): Slope of the activation function at x < 0. Default: 1.0/8.
3021
- upper (Union[int, float]): Slope of the activation function at x < 0. Default: 1.0/3.
3574
+ lower (Union[int, float]): Slope of the activation function at x < 0. Default: ``1.0 / 8`` .
3575
+ upper (Union[int, float]): Slope of the activation function at x < 0. Default: ``1.0 / 3`` .
3022
3576
 
3023
3577
  Returns:
3024
3578
  Tensor, after rrelu, has the same type and shape as the `input`.
@@ -3034,6 +3588,9 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
3034
3588
  ``Ascend`` ``GPU`` ``CPU``
3035
3589
 
3036
3590
  Examples:
3591
+ >>> import mindspore
3592
+ >>> import numpy as np
3593
+ >>> from mindspore import Tensor, ops
3037
3594
  >>> x = Tensor(np.array([[-1.0, 4.0], [2.0, 0]]), mindspore.float32)
3038
3595
  >>> output = ops.rrelu(x)
3039
3596
  >>> print(output)
@@ -3041,18 +3598,25 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
3041
3598
  [ 2. 0. ]]
3042
3599
  """
3043
3600
  if not isinstance(upper, (float, int)):
3044
- raise TypeError(f"For 'ops.rrelu', `upper` must be an int or a float, but got {type(upper)}")
3601
+ raise TypeError(f"For 'rrelu', 'upper' must be an int or a float, but got {type(upper)}")
3045
3602
  if not isinstance(lower, (float, int)):
3046
- raise TypeError(f"For 'ops.rrelu', `lower` must be an int or a float, but got {type(lower)}")
3603
+ raise TypeError(f"For 'rrelu', 'lower' must be an int or a float, but got {type(lower)}")
3047
3604
  if lower > upper:
3048
- raise ValueError(f"For 'ops.rrelu', the value of `upper` must be greater than `lower`, "
3605
+ raise ValueError(f"For 'rrelu', the value of 'upper' must be greater than or equal to 'lower', "
3049
3606
  f"but got upper: {upper}, lower: {lower}. ")
3050
- size = input.shape
3607
+ if not isinstance(input, Tensor):
3608
+ raise TypeError(f"For 'rrelu', the 'input' must be a Tensor but got {type(input)}.")
3609
+ _lower = Tensor(lower, mstype.float32)
3610
+ _upper = Tensor(upper, mstype.float32)
3611
+ _size = input.shape
3612
+ if ops.is_sequence_value_unknown(_size):
3613
+ dyn_shape = _get_cache_prim(P.TensorShape)()
3614
+ _size = dyn_shape(input)
3051
3615
  sign_matrix = _get_cache_prim(P.Sign)()(input)
3052
3616
  negative_filter = sign_matrix.clip(None, 0)
3053
3617
  positive_filter = sign_matrix.clip(0, None)
3054
- dtype = _get_cache_prim(P.DType)()(input)
3055
- mask = _get_cache_prim(P.Cast)()(Tensor(np.random.uniform(lower, upper, size=size)), dtype)
3618
+ _dtype = _get_cache_prim(P.DType)()(input)
3619
+ mask = ops.uniform(_size, _lower, _upper).astype(_dtype)
3056
3620
  negative_mask = negative_filter * mask * -1
3057
3621
  total_mask = negative_mask + positive_filter
3058
3622
  out = total_mask * input
@@ -3098,6 +3662,7 @@ def mirror_pad(input_x, paddings, mode):
3098
3662
  ``Ascend`` ``GPU`` ``CPU``
3099
3663
 
3100
3664
  Examples:
3665
+ >>> from mindspore import Tensor, ops
3101
3666
  >>> input_x = Tensor([[1,2,3], [4,5,6], [7,8,9]])
3102
3667
  >>> mode = "REFLECT"
3103
3668
  >>> paddings = Tensor([[1, 1], [2, 2]])
@@ -3134,10 +3699,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3134
3699
  l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
3135
3700
  \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
3136
3701
 
3137
- where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
3138
- N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
3702
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
3703
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
3139
3704
 
3140
- If reduction is not 'none' (default 'mean'), then
3705
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
3141
3706
 
3142
3707
  .. math::
3143
3708
 
@@ -3156,10 +3721,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3156
3721
  \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
3157
3722
  l_n = - \sum_{c=1}^C w_c \log \frac{\exp(x_{n,c})}{\sum_{i=1}^C \exp(x_{n,i})} y_{n,c}
3158
3723
 
3159
- where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
3160
- N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
3724
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
3725
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
3161
3726
 
3162
- If reduction is not 'none' (default 'mean'), then
3727
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
3163
3728
 
3164
3729
  .. math::
3165
3730
 
@@ -3171,20 +3736,25 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3171
3736
  \end{cases}
3172
3737
 
3173
3738
  Args:
3174
- input (Tensor): :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
3739
+ input (Tensor): :math:`(N)` or :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
3175
3740
  in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)`.
3176
3741
  `input` is expected to be log-probabilities, data type must be float16 or float32.
3177
- target (Tensor): :math:`(N)` or :math:`(N, d_1, d_2, ..., d_K)` for
3178
- high-dimensional loss.
3742
+ target (Tensor): For class indices, tensor of shape :math:`()`, :math:`(N)` or
3743
+ :math:`(N, d_1, d_2, ..., d_K)` , data type must be int32. For probabilities, tensor of shape :math:`(C,)` ,
3744
+ :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32.
3179
3745
  weight (Tensor): A rescaling weight applied to the loss of each batch element.
3180
- If not None, the shape is :math:`(C,)`,
3181
- data type must be float16 or float32. Default: None.
3746
+ If not None, the shape is :math:`(C,)`, data type must be float16 or float32. Default: ``None`` .
3182
3747
  ignore_index (int): Specifies a target value that is ignored
3183
- and does not contribute to the input gradient. Default: -100
3184
- reduction (str): Apply specific reduction method to the output: 'none', 'mean', or 'sum'.
3185
- Default: 'mean'.
3748
+ and does not contribute to the input gradient. Default: ``-100`` .
3749
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3750
+ ``'sum'`` . Default: ``'mean'`` .
3751
+
3752
+ - ``'none'``: no reduction will be applied.
3753
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
3754
+ - ``'sum'``: the output elements will be summed.
3755
+
3186
3756
  label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
3187
- from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: 0.0.
3757
+ from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
3188
3758
 
3189
3759
  Returns:
3190
3760
  Tensor, the computed loss value.
@@ -3193,17 +3763,25 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
3193
3763
  ``Ascend`` ``GPU`` ``CPU``
3194
3764
 
3195
3765
  Examples:
3766
+ >>> import mindspore as ms
3767
+ >>> import numpy as np
3196
3768
  >>> # Case 1: Indices labels
3197
- >>> inputs = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
3198
- >>> target = mindspore.Tensor(np.array([1, 0, 4]), mindspore.int32)
3199
- >>> output = ops.cross_entropy(inputs, target)
3769
+ >>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
3770
+ >>> target = ms.Tensor(np.array([1, 0, 4]), ms.int32)
3771
+ >>> output = ms.ops.cross_entropy(inputs, target)
3200
3772
  >>> # Case 2: Probability labels
3201
- >>> inputs = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
3202
- >>> target = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
3203
- >>> output = ops.cross_entropy(inputs, target)
3773
+ >>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
3774
+ >>> target = ms.Tensor(np.random.randn(3, 5), ms.float32)
3775
+ >>> output = ms.ops.cross_entropy(inputs, target)
3204
3776
  """
3777
+ _check_is_tensor('input', input, "cross_entropy_loss")
3778
+ _check_is_tensor('target', target, "cross_entropy_loss")
3779
+ _check_is_tensor('weight', weight, "cross_entropy_loss")
3780
+ check_int_const(ignore_index, 'ignore_index', "cross_entropy_loss")
3781
+ check_non_negative_float_const(label_smoothing, 'label_smoothing', "cross_entropy_loss")
3782
+ check_string_const(reduction, ['none', 'mean', 'sum'], 'reduction', "cross_entropy_loss")
3205
3783
  class_dim = 0 if input.ndim == 1 else 1
3206
- if input.size == target.size:
3784
+ if target.dtype in [mstype.float32, mstype.float16]:
3207
3785
  return _cross_entropy(input, target, class_dim, weight, reduction, label_smoothing)
3208
3786
  return nll_loss(_innner_log_softmax(input, class_dim), target, weight, ignore_index, reduction, label_smoothing)
3209
3787
 
@@ -3246,9 +3824,10 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
3246
3824
  \{c \not= \text{ignore_index}\},
3247
3825
 
3248
3826
  where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
3249
- N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
3827
+ N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
3828
+ classes.
3250
3829
 
3251
- If reduction is not 'none' (default 'mean'), then
3830
+ If `reduction` is not ``None`` (default 'mean'), then
3252
3831
 
3253
3832
  .. math::
3254
3833
 
@@ -3265,13 +3844,18 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
3265
3844
  high-dimensional loss, data type must be int32.
3266
3845
  weight (Tensor): A rescaling weight applied to the loss of each batch element.
3267
3846
  If not None, the shape is :math:`(C,)`.
3268
- The data type must be float16 or float32. Default: None.
3847
+ The data type must be float16 or float32. Default: ``None`` .
3269
3848
  ignore_index (int): Specifies a target value that is ignored
3270
- and does not contribute to the input gradient. Default: -100
3271
- reduction (str): Apply specific reduction method to the output: 'none', 'mean', or 'sum'.
3272
- Default: 'mean'.
3849
+ and does not contribute to the input gradient. Default: ``-100`` .
3850
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3851
+ ``'sum'`` . Default: ``'mean'`` .
3852
+
3853
+ - ``'none'``: no reduction will be applied.
3854
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
3855
+ - ``'sum'``: the output elements will be summed.
3856
+
3273
3857
  label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
3274
- from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: 0.0.
3858
+ from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
3275
3859
 
3276
3860
  Returns:
3277
3861
  Tensor, the computed loss value.
@@ -3280,7 +3864,9 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
3280
3864
  ``Ascend`` ``GPU`` ``CPU``
3281
3865
 
3282
3866
  Examples:
3283
-
3867
+ >>> import mindspore
3868
+ >>> import numpy as np
3869
+ >>> from mindspore import Tensor, ops
3284
3870
  >>> inputs = mindspore.Tensor(np.random.randn(3, 5), mindspore.float32)
3285
3871
  >>> target = mindspore.Tensor(np.array([1, 0, 4]), mindspore.int32)
3286
3872
  >>> output = ops.nll_loss(inputs, target)
@@ -3361,7 +3947,7 @@ def l1_loss(input, target, reduction='mean'):
3361
3947
  r"""
3362
3948
  Calculate the mean absolute error between the `input` value and the `target` value.
3363
3949
 
3364
- Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to "none" ,
3950
+ Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to ``"none"``,
3365
3951
  then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
3366
3952
 
3367
3953
  The formula is as follows:
@@ -3371,7 +3957,7 @@ def l1_loss(input, target, reduction='mean'):
3371
3957
 
3372
3958
  where :math:`N` is the batch size.
3373
3959
 
3374
- If `reduction` is mean or sum, then:
3960
+ If `reduction` is ``"mean"`` or ``"sum"`` , then:
3375
3961
 
3376
3962
  .. math::
3377
3963
  \ell(x, y) =
@@ -3384,22 +3970,28 @@ def l1_loss(input, target, reduction='mean'):
3384
3970
  input (Tensor): Predicted value, Tensor of any dimension.
3385
3971
  target (Tensor): Target value, usually has the same shape as the `input`.
3386
3972
  If `input` and `target` have different shape, make sure they can broadcast to each other.
3387
- reduction (str, optional): Type of reduction to be applied to loss. The optional value is "mean", "sum" or
3388
- "none". Default: "mean".
3973
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3974
+ ``'sum'`` . Default: ``'mean'`` .
3975
+
3976
+ - ``'none'``: no reduction will be applied.
3977
+ - ``'mean'``: compute and return the mean of elements in the output.
3978
+ - ``'sum'``: the output elements will be summed.
3389
3979
 
3390
3980
  Returns:
3391
- Tensor or Scalar, if `reduction` is "none", return a Tensor with same shape and dtype as `input`.
3981
+ Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
3392
3982
  Otherwise, a scalar value will be returned.
3393
3983
 
3394
3984
  Raises:
3395
3985
  TypeError: If `input` is not a Tensor.
3396
3986
  TypeError: If `target` is not a Tensor.
3397
- ValueError: If `reduction` is not one of "none", "mean" or "sum".
3987
+ ValueError: If `reduction` is not one of ``"none"``, ``"mean"`` or ``"sum"``.
3398
3988
 
3399
3989
  Supported Platforms:
3400
3990
  ``Ascend`` ``GPU`` ``CPU``
3401
3991
 
3402
3992
  Examples:
3993
+ >>> from mindspore import Tensor, ops
3994
+ >>> from mindspore import dtype as mstype
3403
3995
  >>> x = ms.Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
3404
3996
  >>> target = ms.Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
3405
3997
  >>> output = ops.l1_loss(x, target, reduction="mean")
@@ -3441,22 +4033,27 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
3441
4033
  \end{cases}
3442
4034
 
3443
4035
  Here :math:`\text{beta}` controls the point where the loss function changes from quadratic to linear.
3444
- :math:`\text{beta}>0` , its default value is 1.0. :math:`N` is the batch size.
4036
+ :math:`\text{beta}>0` , its default value is ``1.0`` . :math:`N` is the batch size.
3445
4037
 
3446
4038
  Args:
3447
4039
  input (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
3448
4040
  target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`, same shape and dtype as the `input`.
3449
4041
  beta (float): A parameter used to control the point where the function will change between
3450
- L1 to L2 loss. The value should be greater than zero. Default: 1.0.
3451
- reduction (str): Apply specific reduction method to the output: 'none', 'mean' or 'sum'. Default: 'none'.
4042
+ L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
4043
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4044
+ ``'sum'`` . Default: ``'none'`` .
4045
+
4046
+ - ``'none'``: no reduction will be applied.
4047
+ - ``'mean'``: compute and return the mean of elements in the output.
4048
+ - ``'sum'``: the output elements will be summed.
3452
4049
 
3453
4050
  Returns:
3454
- Tensor, if `reduction` is 'none', then output is a tensor with the same shape as `input`.
3455
- Otherwise, the shape of output tensor is `(1,)`.
4051
+ Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `input`.
4052
+ Otherwise, the shape of output tensor is :math:`(1,)`.
3456
4053
 
3457
4054
  Raises:
3458
4055
  TypeError: If `beta` is not a float.
3459
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
4056
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
3460
4057
  TypeError: If dtype of `input` or `target` is not one of float16, float32, float64.
3461
4058
  ValueError: If `beta` is less than or equal to 0.
3462
4059
  ValueError: If shape of `input` is not the same as `target`.
@@ -3465,6 +4062,9 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
3465
4062
  ``Ascend`` ``GPU`` ``CPU``
3466
4063
 
3467
4064
  Examples:
4065
+ >>> import mindspore
4066
+ >>> import numpy as np
4067
+ >>> from mindspore import Tensor, ops
3468
4068
  >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
3469
4069
  >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
3470
4070
  >>> output = ops.smooth_l1_loss(logits, labels)
@@ -3505,6 +4105,8 @@ def threshold(input, thr, value):
3505
4105
  ``Ascend`` ``GPU`` ``CPU``
3506
4106
 
3507
4107
  Examples:
4108
+ >>> import mindspore
4109
+ >>> from mindspore import Tensor, ops
3508
4110
  >>> inputs = mindspore.Tensor([0.0, 2, 3], mindspore.float32)
3509
4111
  >>> outputs = ops.threshold(inputs, 1, 100)
3510
4112
  >>> print(outputs)
@@ -3514,7 +4116,11 @@ def threshold(input, thr, value):
3514
4116
  _check_value_type("thr", thr, [float, int], "threshold")
3515
4117
  _check_value_type("value", value, [float, int], "threshold")
3516
4118
  cond = _get_cache_prim(P.Greater)()(input, thr)
3517
- value = _get_cache_prim(P.Fill)()(input.dtype, input.shape, value)
4119
+ input_type = input.dtype
4120
+ value = Tensor(value, input_type)
4121
+ input_shape = input.shape
4122
+ shape_tensor = _get_cache_prim(TupleToTensor)()(input_shape, mstype.int64)
4123
+ value = _get_cache_prim(P.FillV2)()(shape_tensor, value)
3518
4124
  return _get_cache_prim(P.Select)()(cond, input, value)
3519
4125
 
3520
4126
 
@@ -3536,7 +4142,7 @@ def leaky_relu(input, alpha=0.2):
3536
4142
  Args:
3537
4143
  input (Tensor): The input of leaky_relu is a Tensor of any dimension.
3538
4144
  alpha (Union[int, float]): Slope of the activation function when the element of `input` is less than 0.
3539
- Default: 0.2.
4145
+ Default: ``0.2`` .
3540
4146
 
3541
4147
  Returns:
3542
4148
  Tensor, has the same type and shape as the `input`.
@@ -3549,6 +4155,9 @@ def leaky_relu(input, alpha=0.2):
3549
4155
  ``Ascend`` ``GPU`` ``CPU``
3550
4156
 
3551
4157
  Examples:
4158
+ >>> import mindspore
4159
+ >>> import numpy as np
4160
+ >>> from mindspore import Tensor, ops
3552
4161
  >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
3553
4162
  >>> print(ops.leaky_relu(x, alpha=0.2))
3554
4163
  [[-0.2 4. -1.6]
@@ -3559,6 +4168,7 @@ def leaky_relu(input, alpha=0.2):
3559
4168
  select_op = _get_cache_prim(P.Maximum)()
3560
4169
  if alpha > 1:
3561
4170
  select_op = _get_cache_prim(P.Minimum)()
4171
+ alpha = _get_cache_prim(P.Cast)()(F.scalar_to_tensor(alpha), input.dtype)
3562
4172
  return select_op(alpha * input, input)
3563
4173
 
3564
4174
 
@@ -3587,6 +4197,9 @@ def intopk(x1, x2, k):
3587
4197
  ``Ascend`` ``GPU`` ``CPU``
3588
4198
 
3589
4199
  Examples:
4200
+ >>> import mindspore
4201
+ >>> import numpy as np
4202
+ >>> from mindspore import Tensor, ops
3590
4203
  >>> x1 = Tensor(np.array([[1, 8, 5, 2, 7], [4, 9, 1, 3, 5]]), mindspore.float32)
3591
4204
  >>> x2 = Tensor(np.array([1, 3]), mindspore.int32)
3592
4205
  >>> output = ops.intopk(x1, x2, 3)
@@ -3611,7 +4224,7 @@ def log_softmax(logits, axis=-1):
3611
4224
  Args:
3612
4225
  logits (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
3613
4226
  additional dimensions, with float16 or float32 data type.
3614
- axis (int): The axis to perform the Log softmax operation. Default: -1.
4227
+ axis (int): The axis to perform the Log softmax operation. Default: ``-1`` .
3615
4228
 
3616
4229
  Returns:
3617
4230
  Tensor, with the same type and shape as the logits.
@@ -3620,11 +4233,15 @@ def log_softmax(logits, axis=-1):
3620
4233
  TypeError: If `axis` is not an int.
3621
4234
  TypeError: If dtype of `logits` is neither float16 nor float32.
3622
4235
  ValueError: If `axis` is not in range [-len(logits.shape), len(logits.shape)).
4236
+ ValueError: If dimension of `logits` is less than 1.
3623
4237
 
3624
4238
  Supported Platforms:
3625
4239
  ``Ascend`` ``GPU`` ``CPU``
3626
4240
 
3627
4241
  Examples:
4242
+ >>> import mindspore
4243
+ >>> import numpy as np
4244
+ >>> from mindspore import Tensor, ops
3628
4245
  >>> logits = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
3629
4246
  >>> output = ops.log_softmax(logits)
3630
4247
  >>> print(output)
@@ -3638,6 +4255,10 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
3638
4255
  r"""
3639
4256
  Local Response Normalization.
3640
4257
 
4258
+ .. warning::
4259
+ lrn is deprecated on Ascend due to potential accuracy problem. It's recommended to use other
4260
+ normalization methods, e.g. :class:`mindspore.ops.batch_norm`.
4261
+
3641
4262
  .. math::
3642
4263
 
3643
4264
  b_{c} = a_{c}\left(k + \frac{\alpha}{n}
@@ -3648,11 +4269,12 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
3648
4269
  where the :math:`\alpha` indicates the `alpha`; where the :math:`\beta` indicates the `beta`.
3649
4270
 
3650
4271
  Args:
3651
- depth_radius (int): Half-width of the 1-D normalization window with the shape of 0-D. Default: 5.
3652
- bias (float): An offset (usually positive to avoid dividing by 0). Default: 1.0.
3653
- alpha (float): A scale factor, usually positive. Default: 1.0.
3654
- beta (float): An exponent. Default: 0.5.
3655
- norm_region (str): Specifies normalization region. Options: "ACROSS_CHANNELS". Default: "ACROSS_CHANNELS".
4272
+ depth_radius (int): Half-width of the 1-D normalization window with the shape of 0-D. Default: ``5`` .
4273
+ bias (float): An offset (usually positive to avoid dividing by 0). Default: ``1.0`` .
4274
+ alpha (float): A scale factor, usually positive. Default: ``1.0`` .
4275
+ beta (float): An exponent. Default: ``0.5`` .
4276
+ norm_region (str): Specifies normalization region. Options: ``"ACROSS_CHANNELS"`` .
4277
+ Default: ``"ACROSS_CHANNELS"`` .
3656
4278
  x (Tensor): A 4-D Tensor with float16 or float32 data type.
3657
4279
 
3658
4280
  Returns:
@@ -3665,9 +4287,12 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
3665
4287
  TypeError: If `x` is not a Tensor.
3666
4288
 
3667
4289
  Supported Platforms:
3668
- ``Ascend`` ``GPU`` ``CPU``
4290
+ ``GPU`` ``CPU``
3669
4291
 
3670
4292
  Examples:
4293
+ >>> import mindspore
4294
+ >>> import numpy as np
4295
+ >>> from mindspore import Tensor, ops
3671
4296
  >>> input_x = Tensor(np.array([[[[0.1], [0.2]],
3672
4297
  ... [[0.3], [0.4]]]]), mindspore.float32)
3673
4298
  >>> output = ops.lrn(input_x)
@@ -3695,29 +4320,39 @@ def mish(x):
3695
4320
  <https://arxiv.org/abs/1908.08681>`_.
3696
4321
 
3697
4322
  Args:
3698
- x (Tensor): Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
3699
- additional dimensions, with float16 or float32 data type.
4323
+ x (Tensor): The input Tensor.
4324
+ Supported dtypes:
4325
+
4326
+ - GPU/CPU: float16, float32, float64.
4327
+ - Ascend: float16, float32.
3700
4328
 
3701
4329
  Returns:
3702
4330
  Tensor, with the same type and shape as the `x`.
3703
4331
 
3704
4332
  Raises:
3705
- TypeError: If dtype of `x` is neither float16 nor float32.
4333
+ TypeError: If dtype of `x` is not float16, float32 or float64.
3706
4334
 
3707
4335
  Supported Platforms:
3708
4336
  ``Ascend`` ``GPU`` ``CPU``
3709
4337
 
3710
4338
  Examples:
4339
+ >>> import mindspore
4340
+ >>> import numpy as np
4341
+ >>> from mindspore import Tensor, ops
3711
4342
  >>> input_x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
3712
4343
  >>> output = ops.mish(input_x)
3713
4344
  >>> print(output)
3714
4345
  [[-3.0340147e-01 3.9974129e+00 -2.68311895e-03]
3715
4346
  [ 1.9439590e+00 -3.3576239e-02 8.99999990e+00]]
4347
+ >>> input_x = Tensor(2.1, mindspore.float32)
4348
+ >>> output = ops.mish(input_x)
4349
+ >>> print(output)
4350
+ 2.050599
3716
4351
  """
3717
4352
  return mish_(x)
3718
4353
 
3719
4354
 
3720
- @constexpr
4355
+ @_primexpr
3721
4356
  def _check_value_type(arg_name, arg_value, valid_types, prim_name=None):
3722
4357
  """Checks whether a value is instance of some types."""
3723
4358
  return validator.check_value_type(arg_name, arg_value, valid_types, prim_name)
@@ -3726,8 +4361,8 @@ def _check_value_type(arg_name, arg_value, valid_types, prim_name=None):
3726
4361
  @constexpr(check=False)
3727
4362
  def _check_is_tensor(param_name, input_data, cls_name):
3728
4363
  """Internal function, used to check whether the input data is Tensor."""
3729
- if input_data is not None and not isinstance(ops.typeof(input_data), mstype.tensor_type):
3730
- raise TypeError(f"For '{cls_name}', the '{param_name}' must be '{mstype.tensor_type}', "
4364
+ if input_data is not None and not isinstance(ops.typeof(input_data), mstype.TensorType):
4365
+ raise TypeError(f"For '{cls_name}', the '{param_name}' must be a Tensor, "
3731
4366
  f"but got '{ops.typeof(input_data)}'")
3732
4367
 
3733
4368
 
@@ -3790,10 +4425,54 @@ def _check_type_and_shape_same(param_name1, input_data1, param_name2, input_data
3790
4425
 
3791
4426
 
3792
4427
  def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
3793
- """
4428
+ r"""
3794
4429
  MarginRankingLoss creates a criterion that measures the loss.
3795
4430
 
3796
- For details, please refer to :class:`mindspore.nn.MarginRankingLoss`.
4431
+ Given two tensors :math:`input1`, :math:`input2` and a Tensor label :math:`target` with values 1 or -1,
4432
+ the operation is as follows:
4433
+
4434
+ .. math::
4435
+ \text{loss}(input1, input2, target) = \max(0, -target * (input1 - input2) + \text{margin})
4436
+
4437
+ Args:
4438
+ input1 (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
4439
+ input2 (Tensor): Tensor of shape :math:`(N, *)`, same shape and dtype as `input1`.
4440
+ target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
4441
+ :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_2, x_3, ..., x_R)`.
4442
+ margin (float, optional): Specify the adjustment factor of the operation. Default: ``0.0`` .
4443
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4444
+ ``'sum'`` . Default: ``'mean'`` .
4445
+
4446
+ - ``'none'``: no reduction will be applied.
4447
+ - ``'mean'``: compute and return the mean of elements in the output.
4448
+ - ``'sum'``: the output elements will be summed.
4449
+
4450
+ Returns:
4451
+ Tensor or Scalar. if `reduction` is ``"none"``, its shape is the same as `labels`.
4452
+ Otherwise, a scalar value will be returned.
4453
+
4454
+ Raises:
4455
+ TypeError: If `margin` is not a float.
4456
+ TypeError: If `input1`, `input2` or `target` is not a Tensor.
4457
+ TypeError: If the types of `input1` and `input2` are inconsistent.
4458
+ TypeError: If the types of `input1` and `target` are inconsistent.
4459
+ ValueError: If the shape of `input1` and `input2` are inconsistent.
4460
+ ValueError: If the shape of `input1` and `target` are inconsistent.
4461
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` , ``'sum'``.
4462
+
4463
+ Supported Platforms:
4464
+ ``Ascend`` ``GPU`` ``CPU``
4465
+
4466
+ Examples:
4467
+ >>> import mindspore as ms
4468
+ >>> from mindspore import Tensor, ops
4469
+ >>> import numpy as np
4470
+ >>> input1 = Tensor(np.array([0.3864, -2.4093, -1.4076]), ms.float32)
4471
+ >>> input2 = Tensor(np.array([-0.6012, -1.6681, 1.2928]), ms.float32)
4472
+ >>> target = ops.Sign()(Tensor(np.array([-2, -2, 3]), ms.float32))
4473
+ >>> output = ops.margin_ranking_loss(input1, input2, target)
4474
+ >>> print(output)
4475
+ 1.2293333
3797
4476
  """
3798
4477
  margin = _check_value_type("margin", margin, [float], "margin_ranking_loss")
3799
4478
  _check_is_tensor('input1', input1, "margin_ranking_loss")
@@ -3830,23 +4509,30 @@ def cosine_embedding_loss(input1, input2, target, margin=0.0, reduction="mean"):
3830
4509
  input2 (Tensor): Tensor of shape :math:`(N, *)`, same shape and dtype as `input1`.
3831
4510
  target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
3832
4511
  :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_3, x_4, ..., x_R)`.
3833
- margin (float, optional): Should be in [-1.0, 1.0]. Default 0.0.
3834
- reduction (str, optional): Specifies which reduction to be applied to the output. It must be one of
3835
- "none", "mean", and "sum", meaning no reduction, reduce mean and sum on output, respectively. Default "mean".
4512
+ margin (float, optional): Should be in [-1.0, 1.0]. Default: 0.0.
4513
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4514
+ ``'sum'`` . Default: ``'mean'`` .
4515
+
4516
+ - ``'none'``: no reduction will be applied.
4517
+ - ``'mean'``: compute and return the mean of elements in the output.
4518
+ - ``'sum'``: the output elements will be summed.
3836
4519
 
3837
4520
  Returns:
3838
- Tensor or Scalar, if `reduction` is "none", its shape is the same as `target`.
4521
+ Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `target`.
3839
4522
  Otherwise, a scalar value will be returned.
3840
4523
 
3841
4524
  Raises:
3842
4525
  TypeError: If `margin` is not a float.
3843
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
4526
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
3844
4527
  ValueError: If `margin` is not in range [-1, 1].
3845
4528
 
3846
4529
  Supported Platforms:
3847
4530
  ``Ascend`` ``GPU`` ``CPU``
3848
4531
 
3849
4532
  Examples:
4533
+ >>> import mindspore
4534
+ >>> import numpy as np
4535
+ >>> from mindspore import Tensor, ops
3850
4536
  >>> intput1 = Tensor(np.array([[0.3, 0.8], [0.4, 0.3]]), mindspore.float32)
3851
4537
  >>> intput2 = Tensor(np.array([[0.4, 1.2], [-0.4, -0.9]]), mindspore.float32)
3852
4538
  >>> target = Tensor(np.array([1, -1]), mindspore.int32)
@@ -3909,13 +4595,14 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
3909
4595
  three int numbers that represent depth, height and width respectively.
3910
4596
  stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
3911
4597
  the depth, height and width of movement are both stride, or a tuple of three int numbers that
3912
- represent depth, height and width of movement respectively. Default: `kernel_size`.
4598
+ represent depth, height and width of movement respectively.
4599
+ Default: ``None`` , which indicates the moving step is `kernel_size` .
3913
4600
  padding (Union[int, tuple[int]]): An int number that represents the depth, height and width of movement are both
3914
4601
  strides, or a tuple of three int numbers that represent depth, height and width of movement respectively.
3915
- Default: 0.
3916
- dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: 1.
3917
- ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: False.
3918
- return_indices (bool): Whether to output the indices of max value. Default: False.
4602
+ Default: ``0`` .
4603
+ dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: ``1`` .
4604
+ ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
4605
+ return_indices (bool): Whether to output the indices of max value. Default: ``False`` .
3919
4606
 
3920
4607
  Returns:
3921
4608
  If `return_indices` is False, return a Tensor `output`, else return a tuple (`output`, `argmax`).
@@ -3923,7 +4610,7 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
3923
4610
  - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})`.
3924
4611
  It has the same data type as `x`.
3925
4612
  - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be return
3926
- only when `return_indices` is True.
4613
+ only when `return_indices` is ``True`` .
3927
4614
 
3928
4615
  Raises:
3929
4616
  TypeError: If `x` is not a Tensor.
@@ -3936,6 +4623,9 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
3936
4623
  ``Ascend`` ``GPU`` ``CPU``
3937
4624
 
3938
4625
  Examples:
4626
+ >>> import mindspore
4627
+ >>> import numpy as np
4628
+ >>> from mindspore import Tensor, ops
3939
4629
  >>> x = Tensor(np.arange(2 * 1 * 2 * 2 * 2).reshape((2, 1, 2, 2, 2)), mindspore.float32)
3940
4630
  >>> output_tensor, argmax = ops.max_pool3d(x, kernel_size=2, stride=1, padding=1, return_indices=True)
3941
4631
  >>> print(output_tensor.shape)
@@ -3963,8 +4653,8 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
3963
4653
  For each output location `output[n, :, h, w]`, the size-2 vector `grid[n, h, w]` specifies `input` pixel
3964
4654
  locations `x` and `y`, which are used to interpolate the output value `output[n, :, h, w]`. In the case of 5D
3965
4655
  inputs, `grid[n, d, h, w]`, specifies the `x`, `y`, `z` pixel locations for interpolating
3966
- `output[n, :, d, h, w]`. And `mode` argument specifies "nearest" or "bilinear" or "bicubic"
3967
- (supported in 4D case only) interpolation method to sample the input pixels.
4656
+ `output[n, :, d, h, w]`. And `mode` argument specifies "nearest" or "bilinear" ("bicubic" is not supported yet)
4657
+ interpolation method to sample the input pixels.
3968
4658
 
3969
4659
  `grid` specifies the sampling pixel locations normalized by the `input` spatial dimensions. Therefore, it should
3970
4660
  have most values in the range of :math:`[-1, 1]`.
@@ -3981,15 +4671,26 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
3981
4671
  grid (Tensor): flow-field with shape of :math:`(N, H_{out}, W_{out}, 2)` (4-D case) or :math:`(N, D_{out},
3982
4672
  H_{out}, W_{out}, 3)` (5-D case) and same dtype as `input`.
3983
4673
  mode (str): An optional string specifying the interpolation method. The optional values are
3984
- "bilinear", "nearest" or "bicubic". Default: "bilinear". Note: `bicubic` supports only 4-D input. When
4674
+ ``'bilinear'``, ``'nearest'``. Default: ``'bilinear'`` . Note: `bicubic` is not supported yet. When
3985
4675
  `mode="bilinear"` and the input is 5-D, the interpolation mode used internally will actually
3986
4676
  be trilinear. However, when the input is 4-D, the interpolation mode will legistimately be bilinear.
4677
+ Default: ``'bilinear'`` .
4678
+
4679
+ - ``'nearest'``: Nearest neighbor interpolation. Each output pixel is assigned the value of the
4680
+ nearest input pixel. This method is simple and fast but can result in blocky or pixelated outputs.
4681
+ - ``'bilinear'``: Bilinear interpolation. Each output pixel is a weighted average of the four nearest input
4682
+ pixels, computed using bilinear interpolation. This method produces smoother results compared
4683
+ to nearest neighbor interpolation.
4684
+ - ``'trilinear'``: Trilinear interpolation. This is an extension of bilinear interpolation to 3D data.
4685
+ It performs bilinear interpolation in the two spatial dimensions and linear interpolation along
4686
+ the third dimension. It is commonly used for volume or 3D image interpolation.
4687
+
3987
4688
  padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
3988
- "reflection". Default: "zeros".
4689
+ "reflection". Default: ``'zeros'`` .
3989
4690
  align_corners (bool): An optional bool. If set to `True`, the extrema (-1 and 1) are considered as referring to
3990
4691
  the center points of the input’s corner pixels. If set to `False`, they are instead considered as referring
3991
4692
  to the corner points of the input’s corner pixels, making the sampling more resolution agnostic. Default:
3992
- `False`.
4693
+ ``False`` .
3993
4694
 
3994
4695
  Returns:
3995
4696
  Tensor, dtype is the same as `input` and whose shape is :math:`(N, C, H_{out}, W_{out})` (4-D) and
@@ -4003,13 +4704,15 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
4003
4704
  ValueError: If the rank of `input` or `grid` is not equal to 4(4-D case) or 5(5-D case).
4004
4705
  ValueError: If the first dimension of `input` is not equal to that of `grid`.
4005
4706
  ValueError: If the last dimension of `grid` is not equal to 2(4-D case) or 3(5-D case).
4006
- ValueError: If `mode` is not "bilinear", "nearest", "bicubic" or a string value.
4707
+ ValueError: If `mode` is not "bilinear", "nearest" or a string value.
4007
4708
  ValueError: If `padding_mode` is not "zeros", "border", "reflection" or a string value.
4008
4709
 
4009
4710
  Supported Platforms:
4010
4711
  ``Ascend`` ``GPU`` ``CPU``
4011
4712
 
4012
4713
  Examples:
4714
+ >>> import numpy as np
4715
+ >>> from mindspore import Tensor, ops
4013
4716
  >>> input_x = Tensor(np.arange(16).reshape((2, 2, 2, 2)).astype(np.float32))
4014
4717
  >>> grid = Tensor(np.arange(0.2, 1, 0.1).reshape((2, 2, 1, 2)).astype(np.float32))
4015
4718
  >>> output = ops.grid_sample(input_x, grid, mode='bilinear', padding_mode='zeros',
@@ -4061,11 +4764,15 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
4061
4764
  targets (Tensor): Target sequences. A tensor of shape :math:`(N, S)`, where S is max target length.
4062
4765
  input_lengths (Union(tuple, Tensor)): Lengths of the input. A tuple or Tensor of shape(N).
4063
4766
  target_lengths (Union(tuple, Tensor)): Lengths of the target. A tuple or Tensor of shape(N).
4064
- blank (int, optional): The blank label. Default: 0.
4065
- reduction (str, optional): Implements the reduction method to the output with 'none', 'mean', or 'sum',
4066
- respectively indicate that no calculation is specified, that the mean is used, and that is calculated
4067
- using summation. Default: 'mean'.
4068
- zero_infinity (bool, optional): Whether to set infinite loss and correlation gradient to 0. Default: False.
4767
+ blank (int, optional): The blank label. Default: ``0`` .
4768
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4769
+ ``'sum'`` . Default: ``'mean'`` .
4770
+
4771
+ - ``'none'``: no reduction will be applied.
4772
+ - ``'mean'``: compute and return the mean of elements in the output.
4773
+ - ``'sum'``: the output elements will be summed.
4774
+
4775
+ zero_infinity (bool, optional): Whether to set infinite loss and correlation gradient to 0. Default: ``False`` .
4069
4776
 
4070
4777
  Returns:
4071
4778
  neg_log_likelihood (Tensor), A loss value with shape :math:`(N)` , which is differentiable with respect to
@@ -4081,7 +4788,6 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
4081
4788
  ValueError: If the rank of `targets` is not 2.
4082
4789
  ValueError: If the shape of `input_lengths` does not match N. N is batch size of `log_probs` .
4083
4790
  ValueError: If the shape of `target_lengths` does not match N. N is batch size of `log_probs` .
4084
- TypeError: If the types of `targets`, `input_lengths` or `target_lengths` are different.
4085
4791
  ValueError: If the value of `blank` is not in range [0, num_labels|C). C is number of classes of `log_probs` .
4086
4792
  RuntimeError: If any value of `input_lengths` is larger than T. T is the length of `log_probs`.
4087
4793
  RuntimeError: If any target_lengths[i] is not in range [0, input_length[i]].
@@ -4090,6 +4796,9 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
4090
4796
  ``Ascend`` ``GPU`` ``CPU``
4091
4797
 
4092
4798
  Examples:
4799
+ >>> import numpy as np
4800
+ >>> from mindspore import Tensor, ops
4801
+ >>> from mindspore import dtype as mstype
4093
4802
  >>> log_probs = Tensor(np.array([[[0.3, 0.6, 0.6]],
4094
4803
  ... [[0.9, 0.4, 0.2]]]).astype(np.float32))
4095
4804
  >>> targets = Tensor(np.array([[0, 1]]), mstype.int32)
@@ -4131,11 +4840,9 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
4131
4840
  \ \text{eps}\right)\right) + \frac{\left(\text{x} - \text{target}\right)^2}
4132
4841
  {\text{max}\left(\text{var}, \ \text{eps}\right)}\right) + \text{const.}
4133
4842
 
4134
- where :math:`eps` is used for stability of :math:`log`. When :math:`full=True`,
4135
- a constant will be added to the loss. If
4136
- the shape of :math:`var` and :math:`logits` are not the same (due to a homoscedastic assumption),
4137
- their shapes must allow
4138
- correct broadcasting.
4843
+ where :math:`eps` is used for stability of :math:`log`. When :math:`full=True`, a constant will be added to the
4844
+ loss. If the shape of :math:`var` and :math:`logits` are not the same (due to a homoscedastic assumption),
4845
+ their shapes must allow correct broadcasting.
4139
4846
 
4140
4847
  Args:
4141
4848
  x (Tensor): Tensor of shape :math:`(N, *)` or :math:`(*)` where :math:`*` means any number of
@@ -4146,18 +4853,20 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
4146
4853
  but with one dimension equal to 1, or same shape as the x but with one fewer dimension
4147
4854
  (to allow for broadcasting).
4148
4855
  full (bool, optional): Include the constant term in the loss calculation. When :math:`full=True`,
4149
- the constant term will be :math:`const = 0.5*log(2\pi)`. Default: False.
4150
- eps (float, optional): Used to improve the stability of log function must be greater than 0. Default: 1e-6.
4151
- reduction (str, optional): Apply specific reduction method to the
4152
- output: "none", "mean", or "sum". Default: "mean".
4856
+ the constant term will be :math:`const = 0.5*log(2\pi)`. Default: ``False``.
4857
+ eps (float, optional): Used to improve the stability of log function must be greater than 0. Default: ``1e-6`` .
4858
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4859
+ ``'sum'`` . Default: ``'mean'`` .
4860
+
4861
+ - ``'none'``: no reduction will be applied.
4862
+ - ``'mean'``: compute and return the mean of elements in the output.
4863
+ - ``'sum'``: the output elements will be summed.
4153
4864
 
4154
4865
  Returns:
4155
4866
  Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
4156
4867
 
4157
4868
  Raises:
4158
- TypeError: If `x` is not a Tensor.
4159
- TypeError: If `target` is not a Tensor.
4160
- TypeError: If `var` is not a Tensor.
4869
+ TypeError: If `x`, `target` or `var` is not a Tensor.
4161
4870
  TypeError: If `full` is not a bool.
4162
4871
  TypeError: If `eps` is not a float.
4163
4872
  ValueError: If `eps` is not a float within (0, inf).
@@ -4168,8 +4877,7 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
4168
4877
 
4169
4878
  Examples:
4170
4879
  >>> import numpy as np
4171
- >>> from mindspore import Tensor
4172
- >>> import mindspore.ops as ops
4880
+ >>> from mindspore import Tensor, ops
4173
4881
  >>> import mindspore.common.dtype as mstype
4174
4882
  >>> arr1 = np.arange(8).reshape((4, 2))
4175
4883
  >>> arr2 = np.array([2, 3, 1, 4, 6, 4, 4, 9]).reshape((4, 2))
@@ -4199,6 +4907,14 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
4199
4907
  if reduction not in ('none', 'mean', 'sum'):
4200
4908
  raise ValueError(f"For 'gaussian_nll_loss', 'reduction' must be one of 'none', 'mean', or 'sum',\
4201
4909
  but got {reduction}.")
4910
+ if not x.shape == var.shape:
4911
+ if x.shape[:-1] == var.shape:
4912
+ var = var.unsqueeze(dim=-1)
4913
+ # Heterosclerotic case
4914
+ elif x.shape[:-1] == var.shape[:-1] and var.shape[-1] == 1:
4915
+ pass
4916
+ else:
4917
+ raise ValueError(f"For 'gaussian_nll_loss', 'var' must be able to correctly broadcast to 'x' and 'target'.")
4202
4918
  max_op = P.Maximum()
4203
4919
  log_op = P.Log()
4204
4920
  square_op = P.Square()
@@ -4215,9 +4931,30 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
4215
4931
 
4216
4932
 
4217
4933
  @_primexpr
4218
- def _check_hinge_embedding_loss(shape, shape2, prim_name):
4934
+ def _check_hinge_embedding_loss(shape, shape2):
4219
4935
  if shape2 != shape:
4220
- raise ValueError(f"For '{prim_name}' the input tensor and the labels must have the same shape.")
4936
+ raise ValueError(f"For 'HingeEmbeddingLoss' the input tensor and the labels must have the same shape.")
4937
+
4938
+
4939
+ @_primexpr
4940
+ def _check_hinge_embedding_loss_type(inputs_dtype, targets_dtype, inputs, targets, margin, reduction):
4941
+ """Check hinge embedding loss type."""
4942
+ if not isinstance(margin, (float, int)):
4943
+ raise TypeError(f"For 'HingeEmbeddingLoss', 'margin' must be a float or int, but got {type(margin)}.")
4944
+ if reduction not in ['none', 'mean', 'sum']:
4945
+ raise ValueError(f"For 'HingeEmbeddingLoss', 'reduction' must be one of 'none', 'mean', 'sum',"
4946
+ f"but got {reduction}.")
4947
+ if not isinstance(inputs, Tensor):
4948
+ raise TypeError(f"For 'HingeEmbeddingLoss', the first input must be a Tensor, but got {type(inputs)}.")
4949
+ if not isinstance(targets, Tensor):
4950
+ raise TypeError(f"For 'HingeEmbeddingLoss', the second input must be a Tensor, but got {type(targets)}.")
4951
+
4952
+ if inputs_dtype not in mstype.float_type:
4953
+ raise TypeError(f"For 'HingeEmbeddingLoss', the dtype of the first input must be float, but got "
4954
+ f"{inputs_dtype}.")
4955
+ if targets_dtype not in mstype.float_type:
4956
+ raise TypeError(f"For 'HingeEmbeddingLoss', the dtype of the second input must be float, but got "
4957
+ f"{targets_dtype}.")
4221
4958
 
4222
4959
 
4223
4960
  def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
@@ -4247,9 +4984,13 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
4247
4984
  targets (Tensor): Label values, represented as :math:`y` in the formula.
4248
4985
  Has the same shape as `inputs`, contains -1 or 1.
4249
4986
  margin (float, int): Threshold defined by Hinge Embedding Loss :math:`margin`.
4250
- Represented as :math:`\Delta` in the formula. Default: 1.0.
4251
- reduction (str): Specify the computing method to be applied to the outputs: 'none', 'mean', or 'sum'.
4252
- Default: 'mean'.
4987
+ Represented as :math:`\Delta` in the formula. Default: ``1.0`` .
4988
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4989
+ ``'sum'`` . Default: ``'mean'`` .
4990
+
4991
+ - ``'none'``: no reduction will be applied.
4992
+ - ``'mean'``: compute and return the mean of elements in the output.
4993
+ - ``'sum'``: the output elements will be summed.
4253
4994
 
4254
4995
  Returns:
4255
4996
  Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
@@ -4259,7 +5000,7 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
4259
5000
  TypeError: If `targets` is not a Tensor.
4260
5001
  TypeError: If `margin` is not a float or int.
4261
5002
  ValueError: If `targets` does not have the same shape as `inputs` or they could not broadcast to each other.
4262
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
5003
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
4263
5004
 
4264
5005
  Supported Platforms:
4265
5006
  ``Ascend`` ``GPU`` ``CPU``
@@ -4277,30 +5018,12 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
4277
5018
  >>> print(loss)
4278
5019
  0.16666666
4279
5020
  """
4280
- def _check(inputs_dtype):
4281
- targets_dtype = targets.dtype
4282
- if not isinstance(margin, (float, int)):
4283
- raise TypeError(f"For 'HingeEmbeddingLoss', 'margin' must be a float or int, but got {type(margin)}.")
4284
- if reduction not in ['none', 'mean', 'sum']:
4285
- raise ValueError(f"For 'HingeEmbeddingLoss', 'reduction' must be one of 'none', 'mean', 'sum',"
4286
- f"but got {reduction}.")
4287
- if not isinstance(inputs, Tensor):
4288
- raise TypeError(f"For 'HingeEmbeddingLoss', the first input must be a Tensor, but got {type(inputs)}.")
4289
- if not isinstance(targets, Tensor):
4290
- raise TypeError(f"For 'HingeEmbeddingLoss', the second input must be a Tensor, but got {type(targets)}.")
4291
-
4292
- if inputs_dtype not in mstype.float_type:
4293
- raise TypeError(f"For 'HingeEmbeddingLoss', the dtype of the first input must be float, but got "
4294
- f"{inputs_dtype}.")
4295
- if targets_dtype not in mstype.float_type:
4296
- raise TypeError(f"For 'HingeEmbeddingLoss', the dtype of the second input must be float, but got "
4297
- f"{targets_dtype}.")
4298
-
4299
5021
  inputs_dtype = inputs.dtype
4300
- _check(inputs_dtype)
5022
+ targets_dtype = targets.dtype
5023
+ _check_hinge_embedding_loss_type(inputs_dtype, targets_dtype, inputs, targets, margin, reduction)
4301
5024
  _shape = inputs.shape
4302
5025
  _t_shape = targets.shape
4303
- _check_hinge_embedding_loss(_shape, _t_shape, 'HingeEmbeddingLoss')
5026
+ _check_hinge_embedding_loss(_shape, _t_shape)
4304
5027
 
4305
5028
  min_val = Tensor(0, inputs_dtype)
4306
5029
  pos_index = targets > 0
@@ -4323,6 +5046,9 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
4323
5046
  r"""
4324
5047
  Performs greedy decoding on the logits given in inputs.
4325
5048
 
5049
+ Note:
5050
+ On Ascend, 'merge_repeated' can not be set to false.
5051
+
4326
5052
  Args:
4327
5053
  inputs (Tensor): The input Tensor must be a 3-D tensor whose shape is
4328
5054
  :math:`(max\_time, batch\_size, num\_classes)`. `num_classes` must be `num_labels + 1` classes,
@@ -4330,7 +5056,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
4330
5056
  Default blank label is `num_classes - 1`. Data type must be float32 or float64.
4331
5057
  sequence_length (Tensor): A tensor containing sequence lengths with the shape of :math:`(batch\_size, )`.
4332
5058
  The type must be int32. Each value in the tensor must be equal to or less than `max_time`.
4333
- merge_repeated (bool): If true, merge repeated classes in output. Default: True.
5059
+ merge_repeated (bool): If ``true`` , merge repeated classes in output. Default: ``True`` .
4334
5060
 
4335
5061
  Returns:
4336
5062
  decoded_indices (Tensor), A tensor with shape of :math:`(total\_decoded\_outputs, 2)`.
@@ -4355,6 +5081,9 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
4355
5081
  ``Ascend`` ``GPU`` ``CPU``
4356
5082
 
4357
5083
  Examples:
5084
+ >>> import mindspore
5085
+ >>> import numpy as np
5086
+ >>> from mindspore import Tensor, ops
4358
5087
  >>> inputs = Tensor(np.array([[[0.6, 0.4, 0.2], [0.8, 0.6, 0.3]],
4359
5088
  ... [[0.0, 0.6, 0.0], [0.5, 0.4, 0.5]]]), mindspore.float32)
4360
5089
  >>> sequence_length = Tensor(np.array([2, 2]), mindspore.int32)
@@ -4442,6 +5171,9 @@ def conv3d_transpose(inputs, weight, pad_mode='valid', padding=0, stride=1, dila
4442
5171
  TypeError: If data type of dout and weight is not float16.
4443
5172
 
4444
5173
  Examples:
5174
+ >>> import mindspore
5175
+ >>> import numpy as np
5176
+ >>> from mindspore import Tensor
4445
5177
  >>> dout = Tensor(np.ones([32, 16, 10, 32, 32]), mindspore.float16)
4446
5178
  >>> weight = Tensor(np.ones([16, 3, 4, 6, 2]), mindspore.float16)
4447
5179
  >>> output = conv3d_transpose(dout, weight)
@@ -4472,87 +5204,111 @@ def _manipulate_padding(padding, dim):
4472
5204
  return ms_padding
4473
5205
 
4474
5206
 
4475
- def _manipulate_dilation(dilation, dim=1):
4476
- """convert 1d dilation to 2d"""
4477
- if isinstance(dilation, int):
4478
- return 1, dilation
4479
- if isinstance(dilation, (tuple, list)):
4480
- if len(dilation) != 1:
4481
- raise ValueError(f"For 'conv{dim}d', dilation must be a tuple/list with 1 element or int, \
4482
- but got {dilation}.")
4483
- return 1, dilation[0]
4484
- return dilation
5207
+ def _dim_manipulation(x, name):
5208
+ """convert 1d dilation, stride, etc. to 2d"""
5209
+ if isinstance(x, int):
5210
+ if x <= 0:
5211
+ raise ValueError(f"For 'conv1d', {name} must be a positive int, but got {x}.")
5212
+ return 1, x
5213
+ if isinstance(x, (tuple, list)):
5214
+ if len(x) != 1:
5215
+ raise ValueError(f"For 'conv1d', {name} must be a tuple/list with 1 element or int, but got {x}.")
5216
+ if x[0] <= 0:
5217
+ raise ValueError(f"For 'conv1d', elements in {name} must be positive int, but got {x}.")
5218
+ return 1, x[0]
5219
+ raise ValueError(f"For 'conv1d', {name} must be an int or a tuple/list with 1 element, but got {x}.")
5220
+
5221
+
5222
+ def _check_conv_iterable_lengths(iterable, dim, iter_name):
5223
+ """check iterables lengths used in conv functions"""
5224
+ if len(iterable) != dim:
5225
+ raise ValueError(f"For 'conv{dim}d', the {iter_name} must be a int or a tuple/list with length {dim}, "
5226
+ f"but got {iterable}.")
4485
5227
 
4486
5228
 
4487
5229
  def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
4488
5230
  r"""
4489
- Applies a 1D convolution over an input tensor.
4490
- The input tensor is typically of shape :math:`(N, C_{in}, W_{in})`,
4491
- where :math:`N` is batch size, :math:`C_{in}` is channel number, :math:`W` is width, :math:`X_i` is
4492
- the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
4493
- For each batch of shape :math:`(C_{in}, W_{in})`, the formula is defined as:
5231
+ Applies a 1D convolution over an input tensor. The input Tensor is typically
5232
+ of shape :math:`(N, C_{in}, L_{in})`,
5233
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`L` is input sequence width.
5234
+
5235
+ The output is calculated based on formula:
4494
5236
 
4495
5237
  .. math::
4496
5238
 
4497
- out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{j}, X_i) + b_j,
5239
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
5240
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
5241
+
5242
+ where :math:`bias` is the output channel bias, :math:`ccor` is
5243
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5244
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5245
+
5246
+ Here are the indices' meanings:
5247
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
5248
+
5249
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
5250
+ output channels, which is also equal to the number of kernels.
4498
5251
 
4499
- where :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ operator,
4500
- :math:`C_{in}` is the input channel number, :math:`j` ranges
4501
- from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
4502
- filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{j}` is a slice
4503
- of kernel, and it has shape :math:`(\text{kernal_size})`, where :math:`\text{kernel_size}` is the width of
4504
- the convolution kernel. The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
4505
- where `groups` is the group number to split the input in the channel dimension.
5252
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
5253
+ input channels, which is also equal to the number of channels in the convolutional kernels.
4506
5254
 
4507
- If the `pad_mode` is set to be "valid", the output width will be :math:`\left \lfloor{
4508
- 1 + \frac{W_{in} + \text{padding[0]} - \text{kernel_size} - (\text{kernel_size} - 1) \times(\text{dilation} - 1)}
4509
- {\text { stride }}} \right \rfloor`.
5255
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
5256
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
5257
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
5258
+ channel in the :math:`i`-th batch of the input feature map.
4510
5259
 
4511
- where :math:`dilation` is spacing between kernel elements, :math:`stride` is The step length of each step,
4512
- :math:`padding` is zero-padding added to both sides of the input.
4513
- For output width on other `pad_mode`, please refer to formula on `mindspore.nn.Conv1d
4514
- <https://www.mindspore.cn/docs/en/r2.0/api_python/nn/mindspore.nn.Conv2d.html>`_.
5260
+ The shape of the convolutional kernel is given by :math:`(kernel\_size)`,
5261
+ where :math:`kernel\_size` is the width of the kernel.
5262
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5263
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`,
5264
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
4515
5265
 
4516
- The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
4517
- <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. More detailed introduction can be found here:
4518
- `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
5266
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5267
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
5268
+ and `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
4519
5269
 
4520
5270
  Note:
4521
5271
  On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
4522
5272
  That is, when `groups>1`, condition `C_{in}` = `C_{out}` = `groups` must be satisfied.
4523
5273
 
4524
5274
  Args:
4525
- input (Tensor): Tensor of shape :math:`(N, C_{in}, W_{in})`.
4526
- weight (Tensor): Tensor of shape
4527
- :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`, then the size of kernel is
4528
- :math:`(\text{kernel_size})`.
4529
- bias (Tensor): Bias Tensor with shape :math:`(C_{out})`. When bias is None, zeros will be used. Default: None.
5275
+ input (Tensor): Input Tensor of shape :math:`(N, C_{in}, L_{in})`.
5276
+ weight (Tensor): The convolutional kernel value, it should has shape
5277
+ :math:`(N, C_{in} / \text{groups}, \text{kernel_size})`.
5278
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
5279
+ When bias is None, zeros will be used. Default: ``None`` .
4530
5280
  stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number or a tuple of one int
4531
- that represents width of movement. Default: 1.
5281
+ that represents width of movement. Default: ``1``.
4532
5282
  pad_mode (str, optional): Specifies padding mode. The optional values are
4533
- "same", "valid" and "pad". Default: "valid".
5283
+ ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
4534
5284
 
4535
- - same: Adopts the way of completion. The height and width of the output will be equal to
5285
+ - ``"same"``: Adopts the way of completion. The height and width of the output will be equal to
4536
5286
  the input `x` divided by stride. The padding will be evenly calculated in left and right possiblily.
4537
5287
  Otherwise, the last extra padding will be calculated from the right side.
4538
5288
  If this mode is set, `padding` must be 0.
4539
5289
 
4540
- - valid: Adopts the way of discarding. The possible largest width of output will be returned
5290
+ - ``"valid"``: Adopts the way of discarding. The possible largest width of output will be returned
4541
5291
  without padding. Extra pixels will be discarded. If this mode is set, `padding` must be 0.
4542
5292
 
4543
- - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input
5293
+ - ``"pad"``: Implicit paddings on both sides of the input `x`.
5294
+ The number of `padding` will be padded to the input
4544
5295
  Tensor borders. `padding` must be greater than or equal to 0.
4545
- padding (Union(int, tuple[int]), optional): Implicit paddings on both sides of `input`, meaning the paddings of
4546
- left and right are the same, equal to padding or padding[0] when padding is a tuple of 1 integer.
4547
- Default: 0.
4548
- dilation (Union(int, tuple[int]), optional): Gaps between kernel elements. The data type is int or a tuple of
4549
- 1 integer. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
4550
- there will be :math:`k - 1` pixels skipped for each sampling location. Its value must be greater than or
4551
- equal to 1 and bounded by the width of `input`. Default: 1.
4552
- groups (int, optional): Splits `input` into groups. Default: 1.
5296
+ padding (Union(int, tuple[int], list[int]), optional): Specifies the amount of padding to apply on
5297
+ both side of `input` when `pad_mode` is set to ``"pad"``. The
5298
+ paddings of left and right are the same, equal to padding or padding[0] when padding is a tuple of
5299
+ 1 integer. Default: ``0`` .
5300
+ dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution.
5301
+ It can be a single int or a tuple of 1 integer.
5302
+ Assuming :math:`dilation=(d0,)`, the convolutional kernel samples the input with a
5303
+ spacing of :math:`d0-1` elements in the width direction.
5304
+ The value should be in the ranges [1, L].
5305
+ Default: ``1`` .
5306
+ groups (int, optional): Splits `input` into groups. Default: ``1`` .
4553
5307
 
4554
5308
  Returns:
4555
- Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, W_{out})`.
5309
+ Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, L_{out})`.
5310
+ To see how different pad modes affect the output shape, please refer to
5311
+ :class:`mindspore.nn.Conv1d` for more details.
4556
5312
 
4557
5313
  Raises:
4558
5314
  TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
@@ -4565,16 +5321,23 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
4565
5321
  ValueError: If `pad_mode` is not equal to 'pad' and `padding` is greater than 0.
4566
5322
 
4567
5323
  Supported Platforms:
4568
- ``Ascend`` ``GPU`` ``CPU``
5324
+ ``Ascend`` ``GPU``
4569
5325
 
4570
5326
  Examples:
5327
+ >>> import mindspore
5328
+ >>> import numpy as np
5329
+ >>> from mindspore import Tensor, ops
4571
5330
  >>> x = Tensor(np.arange(64).reshape((4, 4, 4)), mindspore.float32)
4572
- >>> weight = Tensor(np.arange(8).rehspe((2, 2, 2)), mindspore.float32)
5331
+ >>> weight = Tensor(np.arange(8).reshape((2, 2, 2)), mindspore.float32)
4573
5332
  >>> bias = Tensor([-0.12345, 2.7683], ms.float32)
4574
5333
  >>> output = ops.conv1d(x, weight, pad_mode='pad', padding=(1,), bias=bias, groups=2)
4575
5334
  >>> print(output.shape)
4576
5335
  (4, 2, 5)
4577
5336
  """
5337
+ if input.ndim != 3:
5338
+ raise ValueError(f"For 'conv1d', the input must be a 3D Tensor, but got input of {input.ndim}D.")
5339
+ if weight.ndim != 3:
5340
+ raise ValueError(f"For 'conv1d', the weight must be a 3D Tensor, but got input of {weight.ndim}D.")
4578
5341
  _expand = _get_cache_prim(P.ExpandDims)()
4579
5342
  expanded_input = _expand(input, 2)
4580
5343
  sqz = _get_cache_prim(P.Squeeze)(2)
@@ -4589,8 +5352,15 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
4589
5352
  raise ValueError(f"For 'conv1d', padding must be a tuple or list with 1 element or int, but got {padding}.")
4590
5353
  padding = (0, 0, padding[0], padding[0])
4591
5354
  else:
4592
- raise ValueError(f"For 'conv1d', padding must be a tuple, list or int, but got {type(padding)}.")
4593
- dilation = _manipulate_dilation(dilation)
5355
+ raise TypeError(f"For 'conv1d', padding must be a tuple, list or int, but got {type(padding)}.")
5356
+ input_shape = input.shape
5357
+ in_channel = input_shape[1]
5358
+ if not (in_channel % groups == 0 and out_channel % groups == 0):
5359
+ raise ValueError(f"The argument 'groups' should be divisible by 'in_channel' " \
5360
+ f"and 'out_channel', but got group:{groups}, in_channel:{in_channel}, " \
5361
+ f"out_channel:{out_channel}.")
5362
+ dilation = _dim_manipulation(dilation, name='dilation')
5363
+ stride = _dim_manipulation(stride, name='stride')
4594
5364
  conv = _get_cache_prim(P.Conv2D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCHW")
4595
5365
  conv_res = conv(expanded_input, expanded_weight)
4596
5366
  squeezed_conv_res = sqz(conv_res)
@@ -4598,46 +5368,53 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
4598
5368
  return squeezed_conv_res
4599
5369
  if not isinstance(bias, Tensor):
4600
5370
  raise TypeError(f"For 'conv1d', the 'bias' must be a Tensor, but got {type(bias)}.")
5371
+ if bias.shape[0] != out_channel:
5372
+ raise ValueError(f"For 'conv1d', given weight of size {weight_shape}, expected bias to be 1-dimensional with " \
5373
+ f"{out_channel} elements, but got bias of size {bias.shape[0]} instead.")
4601
5374
  output = bias_add(squeezed_conv_res, bias)
4602
5375
  return output
4603
5376
 
4604
5377
 
4605
5378
  def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
4606
5379
  r"""
4607
- Applies a 2D convolution over an input tensor.
4608
- The input tensor is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
4609
- where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is height, :math:`W` is width, :math:`X_i` is
4610
- the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
4611
- For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
5380
+ Applies a 2D convolution over an input tensor. The input tenor is typically of
5381
+ shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
5382
+ channel number, :math:`H` is feature height, :math:`W` is feature width.
5383
+
5384
+ The output is calculated based on formula:
4612
5385
 
4613
5386
  .. math::
4614
5387
 
4615
- out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
5388
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
5389
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
5390
+
5391
+ where :math:`bias` is the output channel bias, :math:`ccor` is
5392
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5393
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5394
+
5395
+ Here are the indices' meanings:
5396
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
4616
5397
 
4617
- where :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ operator,
4618
- :math:`C_{in}` is the input channel number, :math:`j` ranges
4619
- from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
4620
- filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
4621
- of kernel, and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, where :math:`\text{
4622
- kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution kernel.
4623
- The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
4624
- where `groups` is the group number to split the input in the channel dimension.
5398
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
5399
+ output channels, which is also equal to the number of kernels.
4625
5400
 
4626
- If the `pad_mode` is set to be "valid", the output height and width will be :math:`\left \lfloor{
4627
- 1 + \frac{H_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
4628
- (\text{kernel_size[0]} - 1) \times(\text{dilation[0]} - 1)} {\text { stride[0] }}} \right \rfloor` and
5401
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
5402
+ input channels, which is also equal to the number of channels in the convolutional kernels.
4629
5403
 
4630
- :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
4631
- (\text{kernel_size[1]} - 1) \times(\text{dilation[1]} - 1)} {\text { stride[1] }}} \right \rfloor` respectively.
5404
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
5405
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
5406
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
5407
+ channel in the :math:`i`-th batch of the input feature map.
4632
5408
 
4633
- where :math:`dilation` is spacing between kernel elements, :math:`stride` is The step length of each step,
4634
- :math:`padding` is zero-padding added to both sides of the input.
4635
- For output height and width on other `pad_mode`, please refer to formula on `mindspore.nn.Conv2d
4636
- <https://www.mindspore.cn/docs/en/r2.0/api_python/nn/mindspore.nn.Conv2d.html>`_.
5409
+ The shape of the convolutional kernel is given by :math:`(kernel\_size[0], kernel\_size[1])`,
5410
+ where :math:`kernel\_size[0]` and :math:`kernel\_size[1]` are the height and width of the kernel, respectively.
5411
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5412
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
5413
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
4637
5414
 
4638
- The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
4639
- <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_. More detailed introduction can be found here:
4640
- `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
5415
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5416
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
5417
+ `ConvNets <http://cs231n.github.io/convolutional-networks/>`_.
4641
5418
 
4642
5419
  Note:
4643
5420
  On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
@@ -4648,12 +5425,13 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
4648
5425
  weight (Tensor): Tensor of shape
4649
5426
  :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
4650
5427
  is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
4651
- bias (Tensor): Bias Tensor with shape :math:`(C_{out})`. When bias is None, zeros will be used. Default: None.
5428
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
5429
+ When bias is ``None`` , zeros will be used. Default: ``None`` .
4652
5430
  stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
4653
5431
  the height and width of movement are both strides, or a tuple of two int numbers that
4654
- represent height and width of movement respectively. Default: 1.
5432
+ represent height and width of movement respectively. Default: ``1`` .
4655
5433
  pad_mode (str, optional): Specifies padding mode. The optional values are
4656
- "same", "valid" and "pad". Default: "valid".
5434
+ ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
4657
5435
 
4658
5436
  - same: Adopts the way of completion. The height and width of the output will be equal to
4659
5437
  the input `x` divided by stride. The padding will be evenly calculated in top and bottom,
@@ -4665,18 +5443,21 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
4665
5443
 
4666
5444
  - pad: Implicit paddings on both sides of the input `x`. The number of `padding` will be padded to the input
4667
5445
  Tensor borders. `padding` must be greater than or equal to 0.
4668
- padding (Union(int, tuple[int]), optional): Implicit paddings on both sides of the input `x`.
5446
+ padding (Union(int, tuple[int], list[int]), optional): Implicit paddings on both sides of the input `x`.
4669
5447
  If `padding` is one integer, the paddings of top, bottom, left and right are the same, equal to padding.
4670
- If `padding` is a tuple with two integers, the padding of top adn bottom is padding[0], and the padding of
4671
- left and right is padding[1]. Default: 0.
5448
+ If `padding` is a tuple/list with 2 integers, the padding of top adn bottom is padding[0],
5449
+ and the padding of left and right is padding[1]. Default: ``0`` .
4672
5450
  dilation (Union(int, tuple[int]), optional): Gaps between kernel elements.The data type is int or a tuple of
4673
5451
  2 integers. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
4674
5452
  there will be :math:`k - 1` pixels skipped for each sampling location. Its value must
4675
- be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: 1.
4676
- groups (int, optional): Splits `input` into groups. Default: 1.
5453
+ be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: ``1`` .
5454
+ groups (int, optional): Splits `input` into groups. Default: ``1`` .
4677
5455
 
4678
5456
  Returns:
4679
5457
  Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
5458
+ To see how different pad modes affect the output shape, please refer to
5459
+ :class:`mindspore.nn.Conv2d` for more details.
5460
+
4680
5461
 
4681
5462
  Raises:
4682
5463
  TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
@@ -4685,29 +5466,45 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
4685
5466
  ValueError: If the shape of `bias` is not :math:`C_{out}` .
4686
5467
  ValueError: If `stride` or `dilation` is less than 1.
4687
5468
  ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
4688
- ValueError: If `padding` is a tuple whose length is not equal to 2.
5469
+ ValueError: If `padding` is a tuple/list whose length is not equal to 2.
4689
5470
  ValueError: If `pad_mode` is not equal to 'pad' and `padding` is greater than 0.
4690
5471
 
4691
5472
  Supported Platforms:
4692
- ``Ascend`` ``GPU`` ``CPU``
5473
+ ``Ascend`` ``GPU``
4693
5474
 
4694
5475
  Examples:
5476
+ >>> import mindspore
5477
+ >>> import numpy as np
5478
+ >>> from mindspore import Tensor, ops
4695
5479
  >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
4696
5480
  >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
4697
5481
  >>> output = ops.conv2d(x, weight)
4698
5482
  >>> print(output.shape)
4699
5483
  (10, 32, 30, 30)
4700
5484
  """
5485
+ if isinstance(stride, (tuple, list)):
5486
+ _check_conv_iterable_lengths(stride, dim=2, iter_name='stride')
5487
+ if isinstance(dilation, (tuple, list)):
5488
+ _check_conv_iterable_lengths(dilation, dim=2, iter_name='dilation')
4701
5489
  if isinstance(padding, (tuple, list)):
4702
5490
  padding = _manipulate_padding(padding, dim=2)
4703
5491
  weight_shape = weight.shape
4704
5492
  out_channel = weight_shape[0]
4705
5493
  kernel_size = weight_shape[2:4]
5494
+ input_shape = input.shape
5495
+ in_channel = input_shape[1]
5496
+ if not (in_channel % groups == 0 and out_channel % groups == 0):
5497
+ raise ValueError(f"The argument 'groups' should be divisible by 'in_channel' " \
5498
+ f"and 'out_channel', but got group:{groups}, in_channel:{in_channel}, " \
5499
+ f"out_channel:{out_channel}.")
4706
5500
  conv = _get_cache_prim(P.Conv2D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCHW")
4707
5501
  if bias is None:
4708
5502
  return conv(input, weight)
4709
5503
  if not isinstance(bias, Tensor):
4710
5504
  raise TypeError(f"For 'conv2d', the 'bias' must be a Tensor, but got {type(bias)}.")
5505
+ if bias.shape[0] != out_channel:
5506
+ raise ValueError(f"For 'conv2d', Given weight of size {weight_shape}, expected bias to be 1-dimensional with " \
5507
+ f"{out_channel} elements, but got bias of size {bias.shape[0]} instead.")
4711
5508
  conv_result = conv(input, weight)
4712
5509
  output = bias_add(conv_result, bias)
4713
5510
  return output
@@ -4723,24 +5520,27 @@ def hardsigmoid(input):
4723
5520
 
4724
5521
  .. math::
4725
5522
 
4726
- \text{hsigmoid}(x_{i}) = max(0, min(1, \frac{x_{i} + 3}{6}))
5523
+ \text{hsigmoid}(x_{i}) = \max(0, \min(1, \frac{x_{i} + 3}{6}))
4727
5524
 
4728
5525
  where :math:`x_i` is an element of the input Tensor.
4729
5526
 
4730
5527
  Args:
4731
- input (Tensor): Hard Sigmoid input, with float16, float32 or float64 data type.
5528
+ input (Tensor): The input Tensor.
4732
5529
 
4733
5530
  Returns:
4734
5531
  A Tensor whose dtype and shape are the same as `input`.
4735
5532
 
4736
5533
  Raises:
4737
5534
  TypeError: If `input` is not a Tensor.
4738
- TypeError: If dtype of `input` is not float16, float32 or float64.
5535
+ TypeError: If dtype of `input` is not int or float.
4739
5536
 
4740
5537
  Supported Platforms:
4741
5538
  ``Ascend`` ``GPU`` ``CPU``
4742
5539
 
4743
5540
  Examples:
5541
+ >>> import mindspore
5542
+ >>> import numpy as np
5543
+ >>> from mindspore import Tensor, ops
4744
5544
  >>> x = Tensor(np.array([ -3.5, 0, 4.3]), mindspore.float32)
4745
5545
  >>> output = ops.hardsigmoid(x)
4746
5546
  >>> print(output)
@@ -4765,8 +5565,8 @@ def hardtanh(input, min_val=-1.0, max_val=1.0):
4765
5565
 
4766
5566
  Args:
4767
5567
  input (Tensor): Input Tensor.
4768
- min_val (Union[int, float]): Minimum value of the linear region range. Default: -1.0.
4769
- max_val (Union[int, float]): Maximum value of the linear region range. Default: 1.0.
5568
+ min_val (Union[int, float]): Minimum value of the linear region range. Default: ``-1.0`` .
5569
+ max_val (Union[int, float]): Maximum value of the linear region range. Default: ``1.0`` .
4770
5570
 
4771
5571
  Returns:
4772
5572
  Tensor, with the same dtype and shape as `input`.
@@ -4780,6 +5580,8 @@ def hardtanh(input, min_val=-1.0, max_val=1.0):
4780
5580
  ``Ascend`` ``GPU`` ``CPU``
4781
5581
 
4782
5582
  Examples:
5583
+ >>> import mindspore
5584
+ >>> from mindspore import Tensor, ops
4783
5585
  >>> x = Tensor([-1, -2, 0, 2, 1], mindspore.float16)
4784
5586
  >>> output = ops.hardtanh(x, min_val=-1.0, max_val=1.0)
4785
5587
  >>> print(output)
@@ -4788,9 +5590,10 @@ def hardtanh(input, min_val=-1.0, max_val=1.0):
4788
5590
  _check_is_tensor('input', input, "hardtanh")
4789
5591
  _check_value_type("min_val", min_val, [int, float], "hardtanh")
4790
5592
  _check_value_type("max_val", max_val, [int, float], "hardtanh")
5593
+ input_dtype = input.dtype
4791
5594
  input = _get_cache_prim(P.Maximum)()(input, min_val)
4792
5595
  input = _get_cache_prim(P.Minimum)()(input, max_val)
4793
- return input
5596
+ return input.astype(input_dtype)
4794
5597
 
4795
5598
 
4796
5599
  def huber_loss(input, target, reduction='mean', delta=1.0):
@@ -4798,8 +5601,9 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
4798
5601
  Calculates the error between the predicted value and the target value,
4799
5602
  which has the best of both the loss of l1 and the loss of mse.
4800
5603
 
4801
- Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the reduction parameter is set to "none"
4802
- then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction. The formula is as follows:
5604
+ Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the `reduction` parameter
5605
+ is set to ``"none"`` then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
5606
+ The formula is as follows:
4803
5607
 
4804
5608
  .. math::
4805
5609
  \ell(x, y) = L = \{l_1,\dots,l_N\}^\top
@@ -4828,26 +5632,33 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
4828
5632
  target (Tensor): Target value, has same dtype and shape as the `input` in common cases.
4829
5633
  However, when the shape of `target` is different from the shape of `input`,
4830
5634
  and they should be broadcasted to each other.
4831
- reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum" and "none".
4832
- Default: "mean".
5635
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
5636
+ ``'sum'`` . Default: ``'mean'`` .
5637
+
5638
+ - ``'none'``: no reduction will be applied.
5639
+ - ``'mean'``: compute and return the mean of elements in the output.
5640
+ - ``'sum'``: the output elements will be summed.
5641
+
4833
5642
  delta (Union[int, float]): The threshold to change between two type of loss.
4834
- The value must be greater than zero. Default: 1.0.
5643
+ The value must be greater than zero. Default: ``1.0`` .
4835
5644
 
4836
5645
  Returns:
4837
- Tensor or Scalar, if `reduction` is "none", return a Tensor with same shape and dtype as `input`.
5646
+ Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
4838
5647
  Otherwise, a scalar value will be returned.
4839
5648
 
4840
5649
  Raises:
4841
5650
  TypeError: If `input` or `target` is not a Tensor.
4842
5651
  TypeError: If dtype of `delta` is neither float nor int.
4843
5652
  ValueError: If `delta` is less than or equal to 0.
4844
- ValueError: If `reduction` is not one of "none", "mean", "sum".
5653
+ ValueError: If `reduction` is not one of ``"none"``, ``"mean"``, ``"sum"``.
4845
5654
  ValueError: If `input` and `target` have different shapes and cannot be broadcasted to each other.
4846
5655
 
4847
5656
  Supported Platforms:
4848
5657
  ``Ascend`` ``GPU`` ``CPU``
4849
5658
 
4850
5659
  Examples:
5660
+ >>> import mindspore
5661
+ >>> from mindspore import Tensor, ops
4851
5662
  >>> x = Tensor([1, 2, 10, 2], mindspore.float32)
4852
5663
  >>> target = Tensor([1, 5, 1, 20], mindspore.float32)
4853
5664
  >>> output = ops.huber_loss(x, target, reduction="mean", delta=2)
@@ -4907,6 +5718,9 @@ def adaptive_avg_pool1d(input, output_size):
4907
5718
  ``Ascend`` ``GPU`` ``CPU``
4908
5719
 
4909
5720
  Examples:
5721
+ >>> import mindspore
5722
+ >>> import numpy as np
5723
+ >>> from mindspore import Tensor, ops
4910
5724
  >>> input = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
4911
5725
  >>> output = ops.adaptive_avg_pool1d(input, output_size=2)
4912
5726
  >>> print(output.shape)
@@ -4968,7 +5782,7 @@ def batch_norm(input_x, running_mean, running_var, weight, bias, training=False,
4968
5782
  y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
4969
5783
 
4970
5784
  where :math:`\gamma` is `weight`, :math:`\beta` is `bias`, :math:`\epsilon` is `eps`, :math:`mean` is the
4971
- mean of `x`, :math:`variance` is the variance of `x`.
5785
+ mean of :math:`x`, :math:`variance` is the variance of :math:`x`.
4972
5786
 
4973
5787
  .. warning::
4974
5788
  - For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
@@ -4984,11 +5798,11 @@ def batch_norm(input_x, running_mean, running_var, weight, bias, training=False,
4984
5798
  weight (Union[Tensor, Parameter]): The shape :math:`(C,)`, with float16 or float32 data type.
4985
5799
  bias (Union[Tensor, Parameter]): The shape :math:`(C,)`, has the same data type with `weight`.
4986
5800
  training (bool, optional): If `training` is `True`, `mean` and `variance` are computed during training.
4987
- If `training` is `False`, they're loaded from checkpoint during inference. Default: False.
5801
+ If `training` is `False`, they're loaded from checkpoint during inference. Default: ``False`` .
4988
5802
  momentum (float, optional): The hyper parameter to compute moving average for `running_mean` and `running_var`
4989
5803
  (e.g. :math:`new\_running\_mean = (1 - momentum) * running\_mean + momentum * current\_mean`).
4990
- Momentum value must be `[0, 1]`. Default: 0.1.
4991
- eps (float, optional): A small value added for numerical stability. Default: 1e-5.
5804
+ Momentum value must be `[0, 1]`. Default: ``0.1`` .
5805
+ eps (float, optional): A small value added for numerical stability. Default: ``1e-5``, value must be `(0, 1]` .
4992
5806
 
4993
5807
  Returns:
4994
5808
  output_x (Tensor) - The same type and shape as the `input_x`. The shape is :math:`(N, C)`.
@@ -5003,11 +5817,13 @@ def batch_norm(input_x, running_mean, running_var, weight, bias, training=False,
5003
5817
  ``Ascend`` ``GPU`` ``CPU``
5004
5818
 
5005
5819
  Examples:
5006
- >>> input_x = Tensor([[1.0, 2.0], [3.0, 4.0]], dtype.float32)
5007
- >>> running_mean = Tensor([0.5, 1.5], dtype.float32)
5008
- >>> running_var = Tensor([0.1, 0.2], dtype.float32)
5009
- >>> weight = Tensor([2.0, 2.0], dtype.float32)
5010
- >>> bias = Tensor([-1.0, -1.0], dtype.float32)
5820
+ >>> import mindspore
5821
+ >>> from mindspore import Tensor, ops
5822
+ >>> input_x = Tensor([[1.0, 2.0], [3.0, 4.0]], mindspore.float32)
5823
+ >>> running_mean = Tensor([0.5, 1.5], mindspore.float32)
5824
+ >>> running_var = Tensor([0.1, 0.2], mindspore.float32)
5825
+ >>> weight = Tensor([2.0, 2.0], mindspore.float32)
5826
+ >>> bias = Tensor([-1.0, -1.0], mindspore.float32)
5011
5827
  >>> output = ops.batch_norm(input_x, running_mean, running_var, weight, bias)
5012
5828
  >>> print(output)
5013
5829
  [[ 2.1621194 1.2360122]
@@ -5024,21 +5840,29 @@ def bias_add(input_x, bias):
5024
5840
  consistent with the shape of the `input_x` Tensor.
5025
5841
 
5026
5842
  Args:
5027
- input_x (Tensor): The input tensor. The shape can be 2-5 dimensions.
5028
- bias (Tensor): The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of `input_x`.
5843
+ input_x (Tensor): The input tensor. The shape can be 2-5 dimensions. Supported dtypes:
5844
+
5845
+ - Ascend/CPU: all Number type.
5846
+ - GPU: float16, float32, int8.
5847
+
5848
+ bias (Tensor): The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of
5849
+ `input_x`. It has the same type as `input_x`.
5029
5850
 
5030
5851
  Returns:
5031
5852
  Tensor, with the same shape and data type as `input_x`.
5032
5853
 
5033
5854
  Raises:
5034
5855
  TypeError: If `input_x` or `bias` is not a Tensor.
5035
- TypeError: If dtype of `input_x` or `bias` is inconsistent.
5856
+ TypeError: If dtype of `input_x` and `bias` is inconsistent.
5036
5857
  TypeError: If dimension of `input_x` is not in the range [2, 5].
5037
5858
 
5038
5859
  Supported Platforms:
5039
5860
  ``Ascend`` ``GPU`` ``CPU``
5040
5861
 
5041
5862
  Examples:
5863
+ >>> import mindspore
5864
+ >>> import numpy as np
5865
+ >>> from mindspore import Tensor, ops
5042
5866
  >>> input_x = Tensor(np.arange(6).reshape((2, 3)), mindspore.float32)
5043
5867
  >>> bias = Tensor(np.random.random(3).reshape((3)), mindspore.float32)
5044
5868
  >>> output = ops.bias_add(input_x, bias)
@@ -5080,13 +5904,16 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
5080
5904
  labels (Tensor): The target value which has the same shape and data type as `logits`.
5081
5905
  weight (Tensor, optional): A rescaling weight applied to the loss of each batch element.
5082
5906
  Its shape must be able to broadcast to that of `logits` and `labels`.
5083
- And it must have the same shape and data type as `logits`. Default: None. If set to None, the loss function
5907
+ And it must have the same shape and data type as `logits`. Default: ``None`` . If set to ``None`` ,
5908
+ the loss function
5084
5909
  will not consider any sample weights, and each sample will be treated as having equal importance
5085
5910
  when calculating the loss.
5086
- reduction (str, optional): Specify the protocol calculation method used to output the results.
5087
- Its value must be one of 'none', 'mean' or 'sum', respectively indicate that no calculation method is
5088
- specified, using the average value for calculation, and using summation for calculation, not case-sensitive.
5089
- Default: 'mean'.
5911
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
5912
+ ``'sum'`` . Default: ``'mean'`` .
5913
+
5914
+ - ``'none'``: no reduction will be applied.
5915
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
5916
+ - ``'sum'``: the output elements will be summed.
5090
5917
 
5091
5918
  Returns:
5092
5919
  Tensor or Scalar. Returns Tensor that has the same dtype and shape as `logits` if `reduction` is 'none'.
@@ -5095,13 +5922,16 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
5095
5922
  Raises:
5096
5923
  TypeError: If `logits`, `labels` or `weight` is not a Tensor.
5097
5924
  TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 nor float32.
5098
- ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
5925
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
5099
5926
  ValueError: If shape of `labels` is not the same as `logits` or `weight` (if given).
5100
5927
 
5101
5928
  Supported Platforms:
5102
5929
  ``Ascend`` ``GPU`` ``CPU``
5103
5930
 
5104
5931
  Examples:
5932
+ >>> import mindspore
5933
+ >>> import numpy as np
5934
+ >>> from mindspore import Tensor, ops
5105
5935
  >>> logits = Tensor(np.array([0.2, 0.7, 0.1]), mindspore.float32)
5106
5936
  >>> labels = Tensor(np.array([0., 1., 0.]), mindspore.float32)
5107
5937
  >>> weight = Tensor(np.array([1, 2, 2]), mindspore.float32)
@@ -5115,35 +5945,49 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
5115
5945
 
5116
5946
  def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
5117
5947
  r"""
5118
- Applies a 3D convolution over an input tensor. The input tensor is typically of shape
5119
- :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape
5120
- :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, where :math:`N` is batch size, :math:`C` is channel number,
5121
- :math:`D` is depth, :math:`H, W` is feature height and width respectively.
5122
- the output value of a layer is calculated as:
5948
+ Applies a 3D convolution over an input tensor. The input tensor is typically of
5949
+ shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C`
5950
+ is channel number, :math:`D` is feature depth, :math:`H` is feature height, :math:`W` is feature width.
5951
+
5952
+ The output is calculated based on formula:
5123
5953
 
5124
5954
  .. math::
5125
- \operatorname{out}\left(N_{i}, C_{\text {out}_j}\right)=\operatorname{bias}\left(C_{\text {out}_j}\right)+
5126
- \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right),
5127
- \operatorname{input}\left(N_{i}, k\right))
5128
-
5129
- where :math:`k` is kernel,
5130
- :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ ,
5131
- :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the jth channel of
5132
- the output and :math:`j` is in the range of :math:`[0, C_{out}-1]`. :math:`\text{weight}(C_{\text{out}_j}, k)`
5133
- is a convolution kernel slice with shape
5134
- :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
5135
- where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are
5136
- the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter
5137
- and :math:`\text{X}` is the input tensor.
5138
- The shape of full convolution kernel is
5139
- :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
5140
- where `groups` is the number of groups to split `input` in the channel dimension.
5141
-
5142
- For more details, please refer to the paper `Gradient Based Learning Applied to Document
5143
- Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ .
5955
+
5956
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
5957
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
5958
+
5959
+ where :math:`bias` is the output channel bias, :math:`ccor` is
5960
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
5961
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
5962
+
5963
+ Here are the indices' meanings:
5964
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
5965
+
5966
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
5967
+ output channels, which is also equal to the number of kernels.
5968
+
5969
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
5970
+ input channels, which is also equal to the number of channels in the convolutional kernels.
5971
+
5972
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
5973
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
5974
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
5975
+ channel in the :math:`i`-th batch of the input feature map.
5976
+
5977
+ The shape of the convolutional kernel is given by
5978
+ :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
5979
+ where :math:`kernel\_size[0]` , :math:`kernel\_size[1]` and :math:`kernel\_size[2]` are the depth,
5980
+ height and width of the kernel, respectively.
5981
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
5982
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
5983
+ \text{kernel_size[1]}, \text{kernel_size[2]})`,
5984
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
5985
+
5986
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
5987
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
5144
5988
 
5145
5989
  Note:
5146
- 1. On Ascend platform, :math:`groups = 1` must be satisfied.
5990
+ 1. On Ascend platform, `groups = 1` must be satisfied.
5147
5991
  2. On Ascend dilation on depth only supports the case of 1.
5148
5992
 
5149
5993
  Args:
@@ -5151,45 +5995,45 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5151
5995
  weight (Tensor): Set size of kernel is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]},
5152
5996
  \text{kernel_size[2]})`, then the shape is :math:`(C_{out}, C_{in}, \text{kernel_size[0]},
5153
5997
  \text{kernel_size[1]}, \text{kernel_size[1]})`.
5154
- bias (Tensor): Bias Tensor with shape :math:`(C_{out})`. When bias is None, zeros will be used. Default:
5155
- None.
5998
+ bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
5999
+ When bias is None, zeros will be used. Default: ``None`` .
5156
6000
  stride (Union[int, tuple[int]], optional): The distance of kernel moving,
5157
6001
  it can be an int number that represents
5158
6002
  the depth, height and width of movement or a tuple of three int numbers that
5159
- represent depth, height and width movement respectively. Default: 1.
6003
+ represent depth, height and width movement respectively. Default: ``1`` .
5160
6004
  pad_mode (str, optional): Specifies padding mode. The optional values are
5161
- "same", "valid" and "pad". Default: "valid".
6005
+ ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
5162
6006
 
5163
- - same: Adopts the way of completion. The depth, height and width of the output will be equal to
6007
+ - ``"same"``: Adopts the way of completion. The depth, height and width of the output will be equal to
5164
6008
  the input `x` divided by stride. The padding will be evenly calculated in head and tail, top and bottom,
5165
6009
  left and right directions possiblily.
5166
6010
  Otherwise, the last extra padding will be calculated from the tail, bottom and the right side.
5167
6011
  If this mode is set, `pad` must be 0.
5168
6012
 
5169
- - valid: Adopts the way of discarding. The possible largest depth, height and width of output
6013
+ - ``"valid"``: Adopts the way of discarding. The possible largest depth, height and width of output
5170
6014
  will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
5171
6015
  must be 0.
5172
6016
 
5173
- - pad: Implicit paddings on both sides of the input in depth, height and width. The number of `pad` will
5174
- be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
6017
+ - ``"pad"``: Implicit paddings on both sides of the input in depth, height and width.
6018
+ The number of `pad` will be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
5175
6019
 
5176
- padding (Union[int, tuple[int]], optional): The pad value to be filled. If `pad` is an integer,
6020
+ padding (Union[int, tuple[int], list[int]], optional): The pad value to be filled. If `pad` is an integer,
5177
6021
  the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
5178
- If `pad` is a tuple of 3 integers, the padding of head, tail, top, bottom,
5179
- left and right equal to pad[0], pad[0], pad[1], pad[1], pad[2] and pad[2] correspondingly. Default: 0.
6022
+ If `pad` is a tuple/list of 3 integers, the padding of head, tail, top, bottom,
6023
+ left and right equal to pad[0], pad[0], pad[1], pad[1], pad[2] and pad[2] correspondingly. Default: ``0`` .
5180
6024
  dilation (Union[int, tuple[int]], optional): The data type is int or a tuple of 3 integers
5181
6025
  :math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1
5182
6026
  on Ascend backend. Specifies the dilation rate to use for dilated convolution. If set :math:`k > 1`,
5183
6027
  there will be :math:`k - 1` pixels skipped for each sampling location.
5184
6028
  The value ranges for the depth, height, and width dimensions are [1, D], [1, H], and [1, W],
5185
- respectively. Default: 1.
6029
+ respectively. Default: ``1`` .
5186
6030
  groups (int, optional):The number of groups into which the filter is divided. `in_channels`
5187
- and `out_channels` must be divisible by `group`. Default: 1.
6031
+ and `out_channels` must be divisible by `group`. Default: ``1`` .
5188
6032
 
5189
6033
  Returns:
5190
6034
  Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
5191
6035
 
5192
- `pad_mode` is 'same':
6036
+ `pad_mode` is ``"same"``:
5193
6037
 
5194
6038
  .. math::
5195
6039
  \begin{array}{ll} \\
@@ -5198,7 +6042,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5198
6042
  W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
5199
6043
  \end{array}
5200
6044
 
5201
- `pad_mode` is 'valid':
6045
+ `pad_mode` is ``"valid"``:
5202
6046
 
5203
6047
  .. math::
5204
6048
  \begin{array}{ll} \\
@@ -5210,7 +6054,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5210
6054
  {\text{stride[2]}} + 1} \right \rfloor \\
5211
6055
  \end{array}
5212
6056
 
5213
- `pad_mode` is 'pad':
6057
+ `pad_mode` is ``"pad"``:
5214
6058
 
5215
6059
  .. math::
5216
6060
  \begin{array}{ll} \\
@@ -5229,13 +6073,16 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5229
6073
  ValueError: If the shape of `bias` is not :math:`C_{out}`.
5230
6074
  ValueError: If `stride` or `dilation` is less than 1.
5231
6075
  ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
5232
- ValueError: If `padding` is a tuple whose length is not equal to 4.
6076
+ ValueError: If `padding` is a tuple or list whose length is not equal to 3.
5233
6077
  ValueError: If `pad_mode` is not equal to 'pad' and `pad` is greater than 0.
5234
6078
 
5235
6079
  Supported Platforms:
5236
- ``Ascend`` ``GPU`` ``CPU``
6080
+ ``Ascend`` ``GPU``
5237
6081
 
5238
6082
  Examples:
6083
+ >>> import mindspore
6084
+ >>> import numpy as np
6085
+ >>> from mindspore import Tensor, ops
5239
6086
  >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float16)
5240
6087
  >>> weight = Tensor(np.ones([32, 3, 4, 3, 3]), mindspore.float16)
5241
6088
  >>> output = ops.conv3d(x, weight, pad_mode="same", padding=0, stride=1, dilation=1, groups=1)
@@ -5251,6 +6098,15 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5251
6098
  weight_shape = weight.shape
5252
6099
  out_channel = weight_shape[0]
5253
6100
  kernel_size = weight_shape[2:5]
6101
+ if isinstance(stride, (tuple, list)):
6102
+ _check_conv_iterable_lengths(stride, dim=3, iter_name='stride')
6103
+ if isinstance(dilation, (tuple, list)):
6104
+ _check_conv_iterable_lengths(dilation, dim=3, iter_name='dilation')
6105
+ input_shape = input.shape
6106
+ in_channel = input_shape[1]
6107
+ if not (in_channel % groups == 0 and out_channel % groups == 0):
6108
+ raise ValueError("The argument 'groups' should be divisible by 'in_channel' " \
6109
+ "and 'out_channel'")
5254
6110
  if isinstance(padding, (list, tuple)):
5255
6111
  padding = _manipulate_padding(padding, dim=3)
5256
6112
  conv = _get_cache_prim(P.Conv3D)(out_channel, kernel_size, 1, pad_mode, padding, stride, dilation, groups, "NCDHW")
@@ -5263,7 +6119,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5263
6119
  return output
5264
6120
 
5265
6121
 
5266
- @constexpr
6122
+ @_primexpr
5267
6123
  def _check_positive_int(arg_value, arg_name=None, prim_name=None):
5268
6124
  validator.check_positive_int(arg_value, arg_name=arg_name, prim_name=prim_name)
5269
6125
 
@@ -5275,6 +6131,12 @@ def _check_pxiel_shuffle_valid(num, factor):
5275
6131
  "by `upscale_factor` squared.")
5276
6132
 
5277
6133
 
6134
+ def _check_pixel_shuffle_unshuffle_input_shape(input, cls_name):
6135
+ """Internal function, used to check whether the shape of pixel shuffle or unshuffle input meets the requirements."""
6136
+ if input.ndim < 3:
6137
+ raise ValueError(f"For {cls_name}, the dimension of `input` should be larger than 2, but got {input.ndim}.")
6138
+
6139
+
5278
6140
  def pixel_shuffle(input, upscale_factor):
5279
6141
  r"""
5280
6142
  Applies the PixelShuffle operation over input `input` which implements sub-pixel convolutions
@@ -5297,12 +6159,16 @@ def pixel_shuffle(input, upscale_factor):
5297
6159
  Raises:
5298
6160
  ValueError: If `upscale_factor` is not a positive integer.
5299
6161
  ValueError: If the length of third to last dimension is not divisible by `upscale_factor` squared.
5300
- TypeError: If the dimension of `input` is less than 3.
6162
+ ValueError: If the dimension of `input` is less than 3.
6163
+ TypeError: If `input` is not a Tensor.
5301
6164
 
5302
6165
  Supported Platforms:
5303
6166
  ``Ascend`` ``GPU`` ``CPU``
5304
6167
 
5305
6168
  Examples:
6169
+ >>> import mindspore
6170
+ >>> import numpy as np
6171
+ >>> from mindspore import ops
5306
6172
  >>> input_x = np.arange(3 * 2 * 9 * 4 * 4).reshape((3, 2, 9, 4, 4))
5307
6173
  >>> input_x = mindspore.Tensor(input_x, mindspore.dtype.int32)
5308
6174
  >>> output = ops.pixel_shuffle(input_x, 3)
@@ -5310,10 +6176,10 @@ def pixel_shuffle(input, upscale_factor):
5310
6176
  (3, 2, 1, 12, 12)
5311
6177
  """
5312
6178
  _check_positive_int(upscale_factor, "upscale_factor")
6179
+ _check_is_tensor("input", input, "pixel_shuffle")
6180
+ _check_pixel_shuffle_unshuffle_input_shape(input, "pixel_shuffle")
5313
6181
  idx = P.Shape()(input)
5314
- length = len(idx)
5315
- if length < 3:
5316
- raise TypeError(f"For pixel_shuffle, the dimension of `input` should be larger than 2, but got {length}.")
6182
+ length = input.ndim
5317
6183
  pre = idx[:-3]
5318
6184
  c, h, w = idx[-3:]
5319
6185
  _check_pxiel_shuffle_valid(c, upscale_factor)
@@ -5358,12 +6224,16 @@ def pixel_unshuffle(input, downscale_factor):
5358
6224
  Raises:
5359
6225
  ValueError: If `downscale_factor` is not a positive integer.
5360
6226
  ValueError: If the length of second to last dimension or last dimension is not divisible by `downscale_factor` .
5361
- TypeError: If the dimension of `input` is less than 3.
6227
+ ValueError: If the dimension of `input` is less than 3.
6228
+ TypeError: If `input` is not a Tensor.
5362
6229
 
5363
6230
  Supported Platforms:
5364
6231
  ``Ascend`` ``GPU`` ``CPU``
5365
6232
 
5366
6233
  Examples:
6234
+ >>> import mindspore
6235
+ >>> import numpy as np
6236
+ >>> from mindspore import Tensor, ops
5367
6237
  >>> input_x = np.arange(8 * 8).reshape((1, 1, 8, 8))
5368
6238
  >>> input_x = mindspore.Tensor(input_x, mindspore.dtype.int32)
5369
6239
  >>> output = ops.pixel_unshuffle(input_x, 2)
@@ -5371,10 +6241,10 @@ def pixel_unshuffle(input, downscale_factor):
5371
6241
  (1, 4, 4, 4)
5372
6242
  """
5373
6243
  _check_positive_int(downscale_factor, "downscale_factor")
6244
+ _check_is_tensor("input", input, "pixel_unshuffle")
6245
+ _check_pixel_shuffle_unshuffle_input_shape(input, "pixel_unshuffle")
5374
6246
  idx = P.Shape()(input)
5375
- length = len(idx)
5376
- if length < 3:
5377
- raise TypeError(f"For pixel_unshuffle, the dimension of `input` should be larger than 2, but got {length}.")
6247
+ length = input.ndim
5378
6248
  pre = idx[:-3]
5379
6249
  c, h, w = idx[-3:]
5380
6250
  _check_pxiel_unshuffle_valid(h, w, downscale_factor)
@@ -5407,7 +6277,7 @@ def glu(x, axis=-1):
5407
6277
  Args:
5408
6278
  x (Tensor): Tensor to be splited. Its dtype is Number, and shape is :math:`(\ast_1, N, \ast_2)`
5409
6279
  where `*` means, any number of additional dimensions.
5410
- axis (int, optional): the axis to split the input. It must be int. Default: -1, the last axis of `x`.
6280
+ axis (int, optional): the axis to split the input. It must be int. Default: ``-1`` , the last axis of `x`.
5411
6281
 
5412
6282
  Returns:
5413
6283
  Tensor, the same dtype as the `x`, with the shape :math:`(\ast_1, M, \ast_2)` where :math:`M=N/2`.
@@ -5417,9 +6287,10 @@ def glu(x, axis=-1):
5417
6287
  TypeError: If `x` is not a Tensor.
5418
6288
 
5419
6289
  Supported Platforms:
5420
- ``Ascend`` ``CPU``
6290
+ ``Ascend`` ``GPU`` ``CPU``
5421
6291
 
5422
6292
  Examples:
6293
+ >>> from mindspore import Tensor, ops
5423
6294
  >>> input = Tensor([[0.1,0.2,0.3,0.4],[0.5,0.6,0.7,0.8]])
5424
6295
  >>> output = ops.glu(input)
5425
6296
  >>> print(output)
@@ -5455,19 +6326,19 @@ def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean
5455
6326
  It is :math:`x` in the above formula.
5456
6327
  target (Tensor): Ground truth labels, with shape :math:`(N,)`. Data type only support int64. The
5457
6328
  value of target should be non-negative, less than C. It is :math:`y` in the above formula.
5458
- p (int, optional): The norm degree for pairwise distance. Should be 1 or 2. Default: 1.
5459
- margin (int, optional): A parameter to change pairwise distance. Default: 1.
6329
+ p (int, optional): The norm degree for pairwise distance. Should be 1 or 2. Default: ``1`` .
6330
+ margin (int, optional): A parameter to change pairwise distance. Default: ``1`` .
5460
6331
  weight (Tensor, optional): The rescaling weight to each class with shape :math:`(C,)`. Data type only
5461
- support float16, float32 or float64. Default: None.
5462
- reduction (str, optional): Apply specific reduction method to the output: 'none', 'mean',
5463
- 'sum'. Default: 'mean'.
6332
+ support float16, float32 or float64. Default: ``None`` .
6333
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6334
+ ``'sum'`` . Default: ``'mean'`` .
5464
6335
 
5465
- - 'none': no reduction will be applied.
5466
- - 'mean': the sum of the output will be divided by the number of elements in the output.
5467
- - 'sum': the output will be summed.
6336
+ - ``'none'``: no reduction will be applied.
6337
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
6338
+ - ``'sum'``: the output elements will be summed.
5468
6339
 
5469
6340
  Returns:
5470
- Tensor. If `reduction` is 'none', returns a Tensor with the same shape as `target`.
6341
+ Tensor. If `reduction` is ``'none'``, returns a Tensor with the same shape as `target`.
5471
6342
  Otherwise, it is a scalar.
5472
6343
 
5473
6344
  Raises:
@@ -5480,12 +6351,15 @@ def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean
5480
6351
  ValueError: If `reduction` is not one of {'none','sum','mean'}.
5481
6352
  ValueError: If shape[0] of `input` is not equal to shape[0] of `target`.
5482
6353
  ValueError: If shape[1] of `input` is not equal to shape[0] of `weight`.
5483
- ValueError: If rank of `weight` is not 1 or rank of `target` is not 1 or `input` is not 2.
6354
+ ValueError: If rank of `weight` is not 1 or rank of `target` is not 1 or `input` is not 2.
5484
6355
 
5485
6356
  Supported Platforms:
5486
6357
  ``Ascend`` ``GPU`` ``CPU``
5487
6358
 
5488
6359
  Examples:
6360
+ >>> import mindspore
6361
+ >>> import numpy as np
6362
+ >>> from mindspore import Tensor, ops
5489
6363
  >>> inputs = Tensor(np.ones(shape=[3, 3]), mindspore.float32)
5490
6364
  >>> target = Tensor(np.array([1, 2, 1]), mindspore.int64)
5491
6365
  >>> weight = Tensor(np.array([1, 1, 1]), mindspore.float32)
@@ -5528,16 +6402,17 @@ def multilabel_margin_loss(input, target, reduction='mean'):
5528
6402
  is the batch size and :math:`C` is the number of classes. Data type must be float16 or float32.
5529
6403
  target (Tensor): Ground truth data, with the same shape as `input`, data type must be int32 and
5530
6404
  label targets padded by -1.
5531
- reduction (str, optional): Apply specific reduction method to the output: 'none', 'mean',
5532
- 'sum'. Default: 'mean'.
6405
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6406
+ ``'sum'`` . Default: ``'mean'`` .
5533
6407
 
5534
- - 'none': no reduction will be applied.
5535
- - 'mean': the sum of the output will be divided by the number of elements in the output.
5536
- - 'sum': the output will be summed.
6408
+ - ``'none'``: no reduction will be applied.
6409
+ - ``'mean'``: compute and return the mean of elements in the output.
6410
+ - ``'sum'``: the output elements will be summed.
5537
6411
 
5538
6412
  Returns:
5539
- - **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is "none", its shape
5540
- is :math:`(N)`. Otherwise, a scalar value will be returned.
6413
+ - **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss.
6414
+ If `reduction` is ``"none"``, its shape is :math:`(N)`.
6415
+ Otherwise, a scalar value will be returned.
5541
6416
 
5542
6417
  Raises:
5543
6418
  TypeError: If `input` or `target` is not a Tensor.
@@ -5545,12 +6420,15 @@ def multilabel_margin_loss(input, target, reduction='mean'):
5545
6420
  TypeError: If dtype of `target` is not int32.
5546
6421
  ValueError: If length of shape of `input` is neither 1 nor 2.
5547
6422
  ValueError: If shape of `input` is not the same as `target`.
5548
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
6423
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
5549
6424
 
5550
6425
  Supported Platforms:
5551
6426
  ``Ascend`` ``GPU``
5552
6427
 
5553
6428
  Examples:
6429
+ >>> import mindspore
6430
+ >>> import numpy as np
6431
+ >>> from mindspore import Tensor, ops
5554
6432
  >>> inputs = Tensor(np.array([[0.1, 0.2, 0.4, 0.8], [0.2, 0.3, 0.5, 0.7]]), mindspore.float32)
5555
6433
  >>> target = Tensor(np.array([[1, 2, 0, 3], [2, 3, -1, 1]]), mindspore.int32)
5556
6434
  >>> output = ops.multilabel_margin_loss(inputs, target)
@@ -5585,15 +6463,19 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
5585
6463
  multiply to the loss of each class if given.
5586
6464
 
5587
6465
  Args:
5588
- input (Tensor): A tensor of shape (N, C), where N is batch size and C is number of classes.
6466
+ input (Tensor): A tensor of shape :math:`(N, C)` , where N is batch size and C is number of classes.
5589
6467
  target (Tensor): The label target Tensor which has the same shape as `input`.
5590
- weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: None.
5591
- reduction (str): Specifies which reduction to be applied to the output. It must be one of
5592
- 'none', 'mean', and 'sum', meaning no reduction, reduce mean and sum on output, respectively.
5593
- Default: 'mean'.
6468
+ weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: ``None``.
6469
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6470
+ ``'sum'`` . Default: ``'mean'`` .
6471
+
6472
+ - ``'none'``: no reduction will be applied.
6473
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
6474
+ - ``'sum'``: the output elements will be summed.
5594
6475
 
5595
6476
  Returns:
5596
- Tensor, the data type is the same as input, if the reduction is 'none', its shape is (N), otherwise it is zero.
6477
+ Tensor, the data type is the same as input, if the `reduction` is ``'none'``,
6478
+ its shape is :math:`(N)` , otherwise it is zero.
5597
6479
 
5598
6480
  Raises:
5599
6481
  ValueError: If the rank of `input` or `target` is not 2.
@@ -5602,6 +6484,7 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
5602
6484
  ``Ascend`` ``GPU`` ``CPU``
5603
6485
 
5604
6486
  Examples:
6487
+ >>> from mindspore import Tensor, ops
5605
6488
  >>> input = Tensor([[0.3, 0.6, 0.6], [0.9, 0.4, 0.2]])
5606
6489
  >>> target = Tensor([[0.0, 0.0, 1.0], [0.0, 0.0, 1.0]])
5607
6490
  >>> loss = ops.multilabel_soft_margin_loss(input, target, reduction='mean')
@@ -5659,7 +6542,7 @@ def elu(input_x, alpha=1.0):
5659
6542
  Args:
5660
6543
  input_x (Tensor): The input of ELU is a Tensor of any dimension with data type of float16 or float32.
5661
6544
  alpha (float, optional): The alpha value of ELU, the data type is float. Only support '1.0' currently.
5662
- Default: 1.0.
6545
+ Default: ``1.0`` .
5663
6546
 
5664
6547
  Returns:
5665
6548
  Tensor, has the same shape and data type as `input_x`.
@@ -5673,6 +6556,9 @@ def elu(input_x, alpha=1.0):
5673
6556
  ``Ascend`` ``GPU`` ``CPU``
5674
6557
 
5675
6558
  Examples:
6559
+ >>> import mindspore
6560
+ >>> import numpy as np
6561
+ >>> from mindspore import Tensor, ops
5676
6562
  >>> x = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
5677
6563
  >>> output = ops.elu(x)
5678
6564
  >>> print(output)
@@ -5701,12 +6587,12 @@ def gelu(input_x, approximate='none'):
5701
6587
  When `approximate` argument is `tanh`, GeLU is estimated with:
5702
6588
 
5703
6589
  .. math::
5704
- GELU(x_i) = 0.5 * x_i * (1 + tanh(\sqrt(2 / \pi) * (x_i + 0.044715 * x_i^3)))
6590
+ GELU(x_i) = 0.5 * x_i * (1 + \tanh(\sqrt(2 / \pi) * (x_i + 0.044715 * x_i^3)))
5705
6591
 
5706
6592
  Args:
5707
6593
  input_x (Tensor): The input of the activation function GeLU, the data type is float16, float32 or float64.
5708
- approximate (str): the gelu approximation algorithm to use. Acceptable vaslues are 'none' and 'tanh'.
5709
- Default: 'none'.
6594
+ approximate (str): the gelu approximation algorithm to use. Acceptable vaslues are ``'none'`` and ``'tanh'`` .
6595
+ Default: ``'none'`` .
5710
6596
 
5711
6597
  Returns:
5712
6598
  Tensor, with the same type and shape as `input_x`.
@@ -5720,6 +6606,8 @@ def gelu(input_x, approximate='none'):
5720
6606
  ``Ascend`` ``GPU`` ``CPU``
5721
6607
 
5722
6608
  Examples:
6609
+ >>> import mindspore
6610
+ >>> from mindspore import Tensor, ops
5723
6611
  >>> x = Tensor([1.0, 2.0, 3.0], mindspore.float32)
5724
6612
  >>> result = ops.gelu(x)
5725
6613
  >>> print(result)
@@ -5728,21 +6616,24 @@ def gelu(input_x, approximate='none'):
5728
6616
  if approximate not in ['none', 'tanh']:
5729
6617
  raise ValueError("For ops.gelu, approximate value should be either 'none' or 'tanh'.")
5730
6618
 
5731
- output = _get_cache_prim(P.GeLU)()(input_x)
5732
-
6619
+ x_dtype = _get_cache_prim(P.DType)()(input_x)
6620
+ if x_dtype not in [mstype.float16, mstype.float32, mstype.float64]:
6621
+ raise TypeError(f"For gelu, the input dtype must be float16, float32 or float64, "
6622
+ f"but got {x_dtype}.")
5733
6623
  if approximate == 'tanh':
5734
- output = _get_cache_prim(P.Pow)()(input_x, Tensor([3]))
5735
- output = output * Tensor([0.044715]) + input_x
5736
- output = output * _get_cache_prim(P.Sqrt)()(Tensor(2.0 / pi))
5737
- output = _get_cache_prim(P.Tanh)()(output) + Tensor([1.0])
5738
- output = output * input_x * Tensor([0.5])
6624
+ output = _get_cache_prim(P.GeLU)()(input_x)
6625
+ else:
6626
+ output = _get_cache_prim(P.Sqrt)()(Tensor(2.0, x_dtype))
6627
+ output = _get_cache_prim(P.Div)()(input_x, output)
6628
+ output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0, x_dtype)
6629
+ output = input_x * output * Tensor(0.5, x_dtype)
5739
6630
 
5740
6631
  return output
5741
6632
 
5742
6633
 
5743
6634
  def channel_shuffle(x, groups):
5744
6635
  r"""
5745
- Divide the channels in a tensor of shape :math:`(*, C, H, W)` into g groups and
6636
+ Divide the channels in a tensor of shape :math:`(*, C, H, W)` into :math:`g` groups and
5746
6637
  rearrange them as :math:`(*, \frac{C}{g}, g, H*W)`, while keeping the original tensor shapes.
5747
6638
 
5748
6639
  Args:
@@ -5764,6 +6655,9 @@ def channel_shuffle(x, groups):
5764
6655
  ``Ascend`` ``CPU``
5765
6656
 
5766
6657
  Examples:
6658
+ >>> import mindspore
6659
+ >>> import numpy as np
6660
+ >>> from mindspore import Tensor, ops
5767
6661
  >>> group = 2
5768
6662
  >>> x = Tensor(np.arange(1* 4 * 2 * 2).reshape(1, 4, 2, 2).astype(np.int16))
5769
6663
  >>> y = mindspore.ops.channel_shuffle(x, group)
@@ -5813,8 +6707,8 @@ def lp_pool1d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
5813
6707
 
5814
6708
  kernel_size (int): The size of kernel window.
5815
6709
  stride (int): The distance of kernel moving, an int number that represents
5816
- the width of movement is stride, if the value is None, the default value `kernel_size` is used;
5817
- ceil_mode (bool): Whether to use ceil or floor to calculate output shape. Default: False.
6710
+ the width of movement is stride. Default: ``None`` , which indicates the moving step is `kernel_size` .
6711
+ ceil_mode (bool): Whether to use ceil or floor to calculate output shape. Default: ``False`` .
5818
6712
 
5819
6713
  Returns:
5820
6714
  - **output** (Tensor) - LPPool1d result, with shape :math:`(N, C, L_{out})` or :math:`(C, L_{out})`,
@@ -5894,9 +6788,9 @@ def lp_pool2d(x, norm_type, kernel_size, stride=None, ceil_mode=False):
5894
6788
  or a tuple of two int numbers that represent height and width respectively.
5895
6789
  stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
5896
6790
  the height and width of movement are both strides, or a tuple of two int numbers that
5897
- represent height and width of movement respectively, if the value is None,
5898
- the default value `kernel_size` is used.
5899
- ceil_mode (bool): Whether to use ceil or floor to calculate output shape. Default: False.
6791
+ represent height and width of movement respectively.
6792
+ Default: ``None`` , which indicates the moving step is `kernel_size` .
6793
+ ceil_mode (bool): Whether to use ceil or floor to calculate output shape. Default: ``False`` .
5900
6794
 
5901
6795
  Returns:
5902
6796
  - **output** (Tensor) - LPPool2d result, with shape :math:`(N, C, H_{in}, W_{in})`,
@@ -5970,21 +6864,28 @@ def mse_loss(input, target, reduction='mean'):
5970
6864
  target (Tensor): The input label. Tensor of any dimension, same shape as the `input` in common cases.
5971
6865
  However, it supports that the shape of `input` is different from the shape of `target`
5972
6866
  and they should be broadcasted to each other.
5973
- reduction (str, optional): Type of reduction to be applied to loss.
5974
- The optional values are "mean", "none" and "sum". Default: "mean".
6867
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6868
+ ``'sum'`` . Default: ``'mean'`` .
6869
+
6870
+ - ``'none'``: no reduction will be applied.
6871
+ - ``'mean'``: compute and return the mean of elements in the output.
6872
+ - ``'sum'``: the output elements will be summed.
5975
6873
 
5976
6874
  Returns:
5977
- Tensor, loss of type float, the shape is zero if `reduction` is 'mean' or 'sum',
5978
- while the shape of output is the broadcasted shape if `reduction` is 'none'.
6875
+ Tensor, loss of type float, the shape is zero if `reduction` is ``'mean'`` or ``'sum'`` ,
6876
+ while the shape of output is the broadcasted shape if `reduction` is ``'none'`` .
5979
6877
 
5980
6878
  Raises:
5981
- ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
6879
+ ValueError: If `reduction` is not one of ``'none'`` , ``'mean'`` or ``'sum'``.
5982
6880
  ValueError: If `input` and `target` have different shapes and cannot be broadcasted.
5983
6881
 
5984
6882
  Supported Platforms:
5985
6883
  ``Ascend`` ``GPU`` ``CPU``
5986
6884
 
5987
6885
  Examples:
6886
+ >>> import mindspore
6887
+ >>> import numpy as np
6888
+ >>> from mindspore import Tensor, ops
5988
6889
  >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
5989
6890
  >>> labels = Tensor(np.array([[1, 1, 1], [1, 2, 2]]), mindspore.float32)
5990
6891
  >>> output = ops.mse_loss(logits, labels, reduction='none')
@@ -6000,7 +6901,11 @@ def mse_loss(input, target, reduction='mean'):
6000
6901
  raise ValueError("For ops.mse_loss, `reduction` value should be either 'mean', 'none' or 'sum'.")
6001
6902
 
6002
6903
  x = _get_cache_prim(P.Square)()(input - target)
6003
- input_dtype = x.dtype
6904
+ float_type = (mstype.float16, mstype.float32, mstype.float64)
6905
+ if x.dtype not in float_type:
6906
+ input_dtype = mstype.float32
6907
+ else:
6908
+ input_dtype = x.dtype
6004
6909
  x = _get_cache_prim(P.Cast)()(x, mstype.float32)
6005
6910
 
6006
6911
  average_flag = True
@@ -6010,15 +6915,11 @@ def mse_loss(input, target, reduction='mean'):
6010
6915
  if reduction == 'none':
6011
6916
  reduce_flag = False
6012
6917
 
6013
- perm = _get_cache_prim(P.Range)()(Tensor(0, mstype.int32),
6014
- Tensor(len(x.shape), mstype.int32),
6015
- Tensor(1, mstype.int32))
6016
-
6017
6918
  if reduce_flag and average_flag:
6018
- x = _get_cache_prim(P.ReduceMean)()(x, perm)
6919
+ x = _get_cache_prim(P.ReduceMean)()(x, _get_axis(x))
6019
6920
 
6020
6921
  if reduce_flag and not average_flag:
6021
- x = _get_cache_prim(P.ReduceSum)()(x, perm)
6922
+ x = _get_cache_prim(P.ReduceSum)()(x, _get_axis(x))
6022
6923
 
6023
6924
  return _get_cache_prim(P.Cast)()(x, input_dtype)
6024
6925
 
@@ -6066,16 +6967,20 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
6066
6967
  as `anchor`.
6067
6968
  negative (Tensor): A sample belonging to the different class from `anchor`, with the same type and shape
6068
6969
  as `anchor`.
6069
- margin (float, optional): Make a margin between the positive pair and the negative pair. Default: 1.0.
6070
- p (int, optional): The degree of norm for pairwise distance. Default: 2.
6071
- eps (float, optional): Add small value to avoid division by zero. Default: 1e-06.
6970
+ margin (float, optional): Make a margin between the positive pair and the negative pair. Default: ``1.0`` .
6971
+ p (int, optional): The degree of norm for pairwise distance. Default: ``2`` .
6972
+ eps (float, optional): Add small value to avoid division by zero. Default: ``1e-06``.
6072
6973
  swap (bool, optional): The distance swap change the negative distance to the distance between positive
6073
- sample and negative sample. Default: "False".
6074
- reduction (str, optional): Apply specific reduction method to the output: 'none', 'mean', 'sum'.
6075
- Default: "mean".
6974
+ sample and negative sample. Default: ``False`` .
6975
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
6976
+ ``'sum'`` . Default: ``'mean'`` .
6977
+
6978
+ - ``'none'``: no reduction will be applied.
6979
+ - ``'mean'``: compute and return the mean of elements in the output.
6980
+ - ``'sum'``: the output elements will be summed.
6076
6981
 
6077
6982
  Returns:
6078
- Tensor. If `reduction` is "none", its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
6983
+ Tensor. If `reduction` is ``"none"``, its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
6079
6984
 
6080
6985
  Raises:
6081
6986
  TypeError: If `anchor` or `positive` or 'negative' is not a Tensor.
@@ -6088,12 +6993,15 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
6088
6993
  same time.
6089
6994
  ValueError: If the dimension of input `anchor` or `positive` or `negative` is bigger than or equal to 8.
6090
6995
  ValueError: If shape of `anchor`, `positive` and `negative` cannot broadcast.
6091
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
6996
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
6092
6997
 
6093
6998
  Supported Platforms:
6094
6999
  ``GPU``
6095
7000
 
6096
7001
  Examples:
7002
+ >>> import mindspore
7003
+ >>> import numpy as np
7004
+ >>> from mindspore import Tensor, ops
6097
7005
  >>> anchor = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
6098
7006
  >>> positive = Tensor(np.array([[0.4, 0.6], [0.4, 0.6]]), mindspore.float32)
6099
7007
  >>> negative = Tensor(np.array([[0.2, 0.9], [0.3, 0.7]]), mindspore.float32)
@@ -6120,7 +7028,7 @@ def linear(x, w, b):
6120
7028
  def _inner_dropout(x, p, training):
6121
7029
  """inner dropout"""
6122
7030
  _dropout = _get_cache_prim(P.Dropout)(1 - p)
6123
- if p > 0. and training:
7031
+ if 0. < p <= 1. and training:
6124
7032
  return _dropout(x)[0]
6125
7033
  return x
6126
7034
 
@@ -6173,10 +7081,11 @@ def _in_projection_packed(q, k, v, w, b, k_is_v, q_is_k):
6173
7081
  return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
6174
7082
 
6175
7083
 
6176
- def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training):
7084
+ def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training, dtype):
6177
7085
  """scaled dot product attention"""
6178
7086
  embed_size = query.shape[-1]
6179
- scaling_factor = Tensor(embed_size, mstype.float32).sqrt().sqrt()
7087
+ embed_size_tensor = scalar_to_tensor_(embed_size, dtype)
7088
+ scaling_factor = embed_size_tensor.sqrt().sqrt()
6180
7089
  query = query / scaling_factor
6181
7090
 
6182
7091
  if is_causal:
@@ -6194,7 +7103,7 @@ def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_ca
6194
7103
  return (output, attn)
6195
7104
 
6196
7105
 
6197
- @constexpr
7106
+ @_primexpr
6198
7107
  def _check_qkv_shape(query_ndim, key_ndim, value_ndim):
6199
7108
  """Check the expected shape for `query, `key`, `value` and returns whether the input is batched."""
6200
7109
  # Shape check.
@@ -6217,7 +7126,7 @@ def _check_qkv_shape(query_ndim, key_ndim, value_ndim):
6217
7126
  return is_batched
6218
7127
 
6219
7128
 
6220
- @constexpr
7129
+ @_primexpr
6221
7130
  def _check_kpm_shape(query_ndim, kmp_ndim):
6222
7131
  """check key_padding_mask shape"""
6223
7132
  if query_ndim == 3:
@@ -6231,7 +7140,7 @@ def _check_kpm_shape(query_ndim, kmp_ndim):
6231
7140
  f"but got `key_padding_mask` with {kmp_ndim}D.")
6232
7141
 
6233
7142
 
6234
- @constexpr
7143
+ @_primexpr
6235
7144
  def _check_attn_mask_shape(query_ndim, query_shape, key_shape, attn_mask_ndim,
6236
7145
  attn_mask_shape, num_heads):
6237
7146
  """
@@ -6269,7 +7178,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
6269
7178
  out_proj_bias, training=True, key_padding_mask=None, attn_mask=None,
6270
7179
  use_separate_proj_weight=False, q_proj_weight=None, k_proj_weight=None,
6271
7180
  v_proj_weight=None, static_k=None, static_v=None, average_attn_weights=True,
6272
- is_causal=False, k_is_v=False, q_is_k=False):
7181
+ is_causal=False, k_is_v=False, q_is_k=False, dtype=mstype.float32):
6273
7182
  """multi head attetion forward function"""
6274
7183
  is_batched = _check_qkv_shape(query.ndim, key.ndim, value.ndim)
6275
7184
  if key_padding_mask is not None:
@@ -6413,8 +7322,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
6413
7322
 
6414
7323
  if attn_mask is not None and attn_mask.dtype == mstype.bool_:
6415
7324
  new_attn_mask = ops.zeros_like(attn_mask, dtype=q.dtype)
6416
- new_attn_mask.masked_fill(attn_mask, float("-inf"))
6417
- attn_mask = new_attn_mask
7325
+ attn_mask = new_attn_mask.masked_fill(attn_mask, float("-inf"))
6418
7326
 
6419
7327
  if attn_mask is not None:
6420
7328
  if attn_mask.shape[0] == 1:
@@ -6427,7 +7335,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
6427
7335
  v = v.view((bsz, num_heads, src_len, head_dim))
6428
7336
 
6429
7337
  attn_output, attn_output_weights = _scaled_dot_product_attention(
6430
- q, k, v, attn_mask, dropout_p, is_causal, training)
7338
+ q, k, v, attn_mask, dropout_p, is_causal, training, dtype)
6431
7339
  attn_output = attn_output.transpose(2, 0, 1, 3).view((bsz * tgt_len, embed_dim))
6432
7340
 
6433
7341
  attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
@@ -6443,11 +7351,11 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
6443
7351
  return attn_output, attn_output_weights
6444
7352
 
6445
7353
 
6446
- def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False):
7354
+ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False):
6447
7355
  r"""
6448
7356
  Performs a 2D max pooling on the input Tensor.
6449
7357
 
6450
- Typically the input is a Tensor with shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, outputs
7358
+ Typically, the input is a Tensor with shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, outputs
6451
7359
  regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given `kernel_size`
6452
7360
  :math:`ks = (h_{ker}, w_{ker})` and `stride` :math:`s = (s_0, s_1)`, the operation is as follows:
6453
7361
 
@@ -6458,27 +7366,38 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
6458
7366
 
6459
7367
  Args:
6460
7368
  x (Tensor): Tensor of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})` with data type of int8,
6461
- int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
7369
+ int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32 or float64 in CPU or GPU
7370
+ while that of uint16 in Ascend.
6462
7371
  kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg
6463
7372
  value, is an int number that represents height and width of the kernel, or a tuple of
6464
7373
  two int numbers that represent height and width respectively.
6465
7374
  stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
6466
7375
  the height and width of movement are both stride, or a tuple of two int numbers that
6467
- represent height and width of movement respectively. Default: `kernel_size`.
7376
+ represent height and width of movement respectively.
7377
+ Default: ``None`` , which indicates the moving step is `kernel_size` .
6468
7378
  padding (Union[int, tuple[int]]): An int number that represents the height and width of movement are both
6469
7379
  strides, or a tuple of two int numbers that represent height and width of movement respectively.
6470
- Default: 0.
6471
- dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: 1.
6472
- ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: False.
6473
- return_indices (bool): Whether to output the indices of max value. Default: False.
7380
+ Default: ``0`` .
7381
+ dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: ``1`` .
7382
+ return_indices (bool): Whether to output the indices of max value. Default: ``False`` .
7383
+ ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` .
6474
7384
 
6475
7385
  Returns:
6476
- If `return_indices` is False, return a Tensor `output`, else return a tuple (`output`, `argmax`).
7386
+ If `return_indices` is ``False`` , return a Tensor `output`, else return a tuple (`output`, `argmax`).
6477
7387
 
6478
7388
  - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`.
6479
7389
  It has the same data type as `x`.
6480
- - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be return
6481
- only when `return_indices` is True.
7390
+
7391
+ .. math::
7392
+ H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]}
7393
+ \times (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor
7394
+
7395
+ .. math::
7396
+ W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]}
7397
+ \times (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor
7398
+
7399
+ - **argmax** (Tensor) - Index corresponding to the maximum value. In CPU and GPU, data type is int64
7400
+ while that is uint16 in Ascend. It will be return only when `return_indices` is True.
6482
7401
 
6483
7402
  Raises:
6484
7403
  TypeError: If `x` is not a Tensor.
@@ -6486,12 +7405,16 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
6486
7405
  TypeError: If `kernel_size` , `stride` , `padding` or `dilation` is not int or tuple.
6487
7406
  ValueError: If `kernel_size`, `stride` or `dilation` is less than 1.
6488
7407
  ValueError: If `padding` is less than 0.
6489
- TypeError: If `ceil_mode` is not bool
7408
+ ValueError: If `padding` is more than half of `kernel_size`.
7409
+ TypeError: If `ceil_mode` is not bool.
6490
7410
 
6491
7411
  Supported Platforms:
6492
7412
  ``Ascend`` ``GPU`` ``CPU``
6493
7413
 
6494
7414
  Examples:
7415
+ >>> import mindspore
7416
+ >>> import numpy as np
7417
+ >>> from mindspore import Tensor, ops
6495
7418
  >>> x = Tensor(np.arange(20 * 16 * 50 * 32).reshape((20, 16, 50, 32)), mindspore.float32)
6496
7419
  >>> output_tensor, argmax = ops.max_pool2d(x, kernel_size=(3, 2), stride=(2, 1), return_indices=True)
6497
7420
  >>> print(output_tensor.shape)
@@ -6520,6 +7443,7 @@ __all__ = [
6520
7443
  'avg_pool3d',
6521
7444
  'batch_norm',
6522
7445
  'bias_add',
7446
+ 'bidense',
6523
7447
  'binary_cross_entropy',
6524
7448
  'binary_cross_entropy_with_logits',
6525
7449
  'cosine_embedding_loss',
@@ -6527,6 +7451,7 @@ __all__ = [
6527
7451
  'max_pool3d',
6528
7452
  'kl_div',
6529
7453
  'celu',
7454
+ 'dense',
6530
7455
  'deformable_conv2d',
6531
7456
  'dropout1d',
6532
7457
  'dropout2d',
@@ -6553,8 +7478,10 @@ __all__ = [
6553
7478
  'softsign',
6554
7479
  'softshrink',
6555
7480
  'soft_shrink',
7481
+ 'softplus',
6556
7482
  'selu',
6557
7483
  'silu',
7484
+ 'soft_margin_loss',
6558
7485
  'softmax',
6559
7486
  'softmin',
6560
7487
  'pdist',