mindspore 2.1.0__cp38-cp38-win_amd64.whl → 2.2.11__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (511) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +4 -1
  5. mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
  8. mindspore/_check_jit_forbidden_api.py +3 -1
  9. mindspore/_checkparam.py +23 -29
  10. mindspore/_extends/graph_kernel/__init__.py +0 -1
  11. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  12. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  13. mindspore/_extends/graph_kernel/splitter.py +4 -11
  14. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
  15. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
  16. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  17. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  18. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
  19. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
  20. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  21. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  22. mindspore/_extends/parse/__init__.py +13 -15
  23. mindspore/_extends/parse/namespace.py +7 -33
  24. mindspore/_extends/parse/parser.py +67 -72
  25. mindspore/_extends/parse/resources.py +1 -1
  26. mindspore/_extends/parse/standard_method.py +86 -106
  27. mindspore/_extends/parse/trope.py +1 -1
  28. mindspore/_extends/remote/kernel_build_server.py +25 -7
  29. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  30. mindspore/_install_custom.py +43 -0
  31. mindspore/amp.py +47 -11
  32. mindspore/atlprov.dll +0 -0
  33. mindspore/boost/boost.py +1 -8
  34. mindspore/boost/boost_cell_wrapper.py +3 -2
  35. mindspore/boost/grad_accumulation.py +1 -1
  36. mindspore/boost/group_loss_scale_manager.py +8 -7
  37. mindspore/c1.dll +0 -0
  38. mindspore/c1xx.dll +0 -0
  39. mindspore/c2.dll +0 -0
  40. mindspore/common/__init__.py +5 -3
  41. mindspore/common/_jit_fallback_utils.py +6 -0
  42. mindspore/common/_register_for_adapter.py +2 -0
  43. mindspore/common/_register_for_tensor.py +2 -2
  44. mindspore/common/_stub_tensor.py +13 -0
  45. mindspore/common/_utils.py +29 -0
  46. mindspore/common/api.py +174 -259
  47. mindspore/common/auto_dynamic_shape.py +494 -0
  48. mindspore/common/dtype.py +18 -11
  49. mindspore/common/dump.py +6 -4
  50. mindspore/common/initializer.py +14 -14
  51. mindspore/common/jit_config.py +33 -15
  52. mindspore/common/lazy_inline.py +126 -7
  53. mindspore/common/mindir_util.py +101 -0
  54. mindspore/common/parameter.py +51 -41
  55. mindspore/common/seed.py +4 -4
  56. mindspore/common/sparse_tensor.py +13 -14
  57. mindspore/common/tensor.py +243 -165
  58. mindspore/communication/__init__.py +7 -4
  59. mindspore/communication/_comm_helper.py +83 -4
  60. mindspore/communication/management.py +152 -84
  61. mindspore/config/op_info.config +14 -3
  62. mindspore/context.py +152 -61
  63. mindspore/dataset/__init__.py +5 -5
  64. mindspore/dataset/audio/__init__.py +2 -2
  65. mindspore/dataset/audio/transforms.py +52 -52
  66. mindspore/dataset/callback/ds_callback.py +16 -2
  67. mindspore/dataset/core/config.py +68 -51
  68. mindspore/dataset/engine/cache_client.py +33 -7
  69. mindspore/dataset/engine/datasets.py +250 -112
  70. mindspore/dataset/engine/datasets_audio.py +43 -211
  71. mindspore/dataset/engine/datasets_standard_format.py +16 -35
  72. mindspore/dataset/engine/datasets_text.py +43 -67
  73. mindspore/dataset/engine/datasets_user_defined.py +86 -100
  74. mindspore/dataset/engine/datasets_vision.py +219 -1029
  75. mindspore/dataset/engine/iterators.py +11 -4
  76. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
  77. mindspore/dataset/engine/obs/util.py +3 -0
  78. mindspore/dataset/engine/samplers.py +1 -1
  79. mindspore/dataset/engine/validators.py +19 -5
  80. mindspore/dataset/text/__init__.py +3 -3
  81. mindspore/dataset/text/transforms.py +101 -127
  82. mindspore/dataset/text/utils.py +205 -138
  83. mindspore/dataset/transforms/__init__.py +1 -1
  84. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  85. mindspore/dataset/transforms/transforms.py +95 -40
  86. mindspore/dataset/utils/browse_dataset.py +8 -2
  87. mindspore/dataset/utils/line_reader.py +17 -19
  88. mindspore/dataset/vision/__init__.py +3 -3
  89. mindspore/dataset/vision/c_transforms.py +6 -3
  90. mindspore/dataset/vision/transforms.py +409 -287
  91. mindspore/dataset/vision/utils.py +13 -14
  92. mindspore/dataset/vision/validators.py +11 -1
  93. mindspore/dnnl.dll +0 -0
  94. mindspore/dpcmi.dll +0 -0
  95. mindspore/experimental/map_parameter.py +14 -0
  96. mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
  97. mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
  98. mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
  99. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  100. mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
  101. mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
  102. mindspore/gen_ops.py +273 -0
  103. mindspore/include/OWNERS +0 -1
  104. mindspore/include/api/data_type.h +2 -1
  105. mindspore/include/api/graph.h +0 -15
  106. mindspore/include/api/kernel.h +2 -0
  107. mindspore/include/api/kernel_api.h +37 -12
  108. mindspore/include/api/model.h +17 -14
  109. mindspore/include/api/status.h +8 -3
  110. mindspore/include/api/types.h +37 -4
  111. mindspore/include/c_api/ms/abstract.h +67 -0
  112. mindspore/include/c_api/ms/attribute.h +197 -0
  113. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  114. mindspore/include/c_api/ms/base/macros.h +32 -0
  115. mindspore/include/c_api/ms/base/status.h +33 -0
  116. mindspore/include/c_api/ms/base/types.h +282 -0
  117. mindspore/include/c_api/ms/context.h +102 -0
  118. mindspore/include/c_api/ms/graph.h +160 -0
  119. mindspore/include/c_api/ms/node.h +606 -0
  120. mindspore/include/c_api/ms/tensor.h +161 -0
  121. mindspore/include/c_api/ms/value.h +84 -0
  122. mindspore/include/dataset/constants.h +6 -5
  123. mindspore/include/dataset/execute.h +23 -13
  124. mindspore/include/dataset/text.h +26 -26
  125. mindspore/include/dataset/transforms.h +13 -13
  126. mindspore/include/dataset/vision.h +60 -60
  127. mindspore/include/dataset/vision_ascend.h +5 -6
  128. mindspore/include/dataset/vision_lite.h +17 -17
  129. mindspore/jpeg62.dll +0 -0
  130. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  131. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  132. mindspore/mindspore_backend.dll +0 -0
  133. mindspore/mindspore_common.dll +0 -0
  134. mindspore/mindspore_core.dll +0 -0
  135. mindspore/mindspore_glog.dll +0 -0
  136. mindspore/mindspore_shared_lib.dll +0 -0
  137. mindspore/msobj140.dll +0 -0
  138. mindspore/mspdb140.dll +0 -0
  139. mindspore/mspdbcore.dll +0 -0
  140. mindspore/mspdbst.dll +0 -0
  141. mindspore/mspft140.dll +0 -0
  142. mindspore/msvcdis140.dll +0 -0
  143. mindspore/msvcp140_1.dll +0 -0
  144. mindspore/msvcp140_2.dll +0 -0
  145. mindspore/msvcp140_atomic_wait.dll +0 -0
  146. mindspore/msvcp140_codecvt_ids.dll +0 -0
  147. mindspore/nn/__init__.py +0 -2
  148. mindspore/nn/cell.py +313 -74
  149. mindspore/nn/dynamic_lr.py +21 -21
  150. mindspore/nn/layer/activation.py +22 -30
  151. mindspore/nn/layer/basic.py +15 -13
  152. mindspore/nn/layer/channel_shuffle.py +1 -1
  153. mindspore/nn/layer/container.py +271 -9
  154. mindspore/nn/layer/conv.py +323 -204
  155. mindspore/nn/layer/dense.py +8 -5
  156. mindspore/nn/layer/embedding.py +33 -27
  157. mindspore/nn/layer/flash_attention.py +61 -95
  158. mindspore/nn/layer/image.py +8 -6
  159. mindspore/nn/layer/math.py +16 -25
  160. mindspore/nn/layer/normalization.py +107 -66
  161. mindspore/nn/layer/padding.py +1 -1
  162. mindspore/nn/layer/pooling.py +131 -109
  163. mindspore/nn/layer/rnn_cells.py +27 -22
  164. mindspore/nn/layer/rnns.py +13 -16
  165. mindspore/nn/layer/thor_layer.py +1 -1
  166. mindspore/nn/layer/transformer.py +221 -154
  167. mindspore/nn/learning_rate_schedule.py +9 -1
  168. mindspore/nn/loss/loss.py +235 -174
  169. mindspore/nn/optim/ada_grad.py +2 -1
  170. mindspore/nn/optim/adadelta.py +1 -0
  171. mindspore/nn/optim/adafactor.py +2 -1
  172. mindspore/nn/optim/adam.py +7 -4
  173. mindspore/nn/optim/adamax.py +3 -2
  174. mindspore/nn/optim/adasum.py +2 -2
  175. mindspore/nn/optim/asgd.py +2 -3
  176. mindspore/nn/optim/ftrl.py +6 -5
  177. mindspore/nn/optim/lamb.py +7 -4
  178. mindspore/nn/optim/lars.py +1 -1
  179. mindspore/nn/optim/lazyadam.py +5 -3
  180. mindspore/nn/optim/momentum.py +2 -1
  181. mindspore/nn/optim/optimizer.py +53 -4
  182. mindspore/nn/optim/proximal_ada_grad.py +3 -4
  183. mindspore/nn/optim/rmsprop.py +4 -3
  184. mindspore/nn/optim/rprop.py +23 -12
  185. mindspore/nn/optim/sgd.py +26 -11
  186. mindspore/nn/optim/thor.py +9 -7
  187. mindspore/nn/probability/bijector/bijector.py +5 -5
  188. mindspore/nn/probability/bijector/power_transform.py +27 -27
  189. mindspore/nn/probability/bijector/softplus.py +3 -3
  190. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
  191. mindspore/nn/probability/distribution/bernoulli.py +5 -5
  192. mindspore/nn/probability/distribution/beta.py +3 -3
  193. mindspore/nn/probability/distribution/categorical.py +7 -7
  194. mindspore/nn/probability/distribution/cauchy.py +0 -1
  195. mindspore/nn/probability/distribution/distribution.py +3 -3
  196. mindspore/nn/probability/distribution/gamma.py +3 -3
  197. mindspore/nn/probability/distribution/geometric.py +4 -4
  198. mindspore/nn/probability/distribution/gumbel.py +4 -4
  199. mindspore/nn/probability/distribution/log_normal.py +2 -2
  200. mindspore/nn/probability/distribution/logistic.py +2 -2
  201. mindspore/nn/probability/distribution/poisson.py +4 -4
  202. mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
  203. mindspore/nn/probability/distribution/uniform.py +6 -6
  204. mindspore/nn/wrap/__init__.py +4 -2
  205. mindspore/nn/wrap/cell_wrapper.py +87 -34
  206. mindspore/nn/wrap/grad_reducer.py +8 -5
  207. mindspore/nn/wrap/loss_scale.py +105 -42
  208. mindspore/numpy/array_creations.py +1 -2
  209. mindspore/numpy/array_ops.py +3 -2
  210. mindspore/numpy/utils_const.py +5 -5
  211. mindspore/opencv_core452.dll +0 -0
  212. mindspore/opencv_imgcodecs452.dll +0 -0
  213. mindspore/opencv_imgproc452.dll +0 -0
  214. mindspore/ops/_grad_experimental/__init__.py +0 -5
  215. mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
  216. mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
  217. mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
  218. mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
  219. mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
  220. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
  221. mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
  222. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  223. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  224. mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
  225. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  226. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  227. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  228. mindspore/ops/_op_impl/{_custom_op/flash_attention/constants.py → aicpu/eps.py} +18 -27
  229. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  230. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
  231. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  232. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  233. mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
  234. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  235. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  236. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  237. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  238. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  239. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  240. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  241. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
  242. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
  243. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  244. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  245. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  246. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  247. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  248. mindspore/ops/_op_impl/tbe/__init__.py +4 -4
  249. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  250. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  251. mindspore/ops/_primitive_cache.py +1 -1
  252. mindspore/ops/_tracefunc.py +45 -13
  253. mindspore/ops/_utils/utils.py +6 -1
  254. mindspore/ops/_vmap/vmap_array_ops.py +3 -3
  255. mindspore/ops/_vmap/vmap_base.py +3 -3
  256. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  257. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  258. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  259. mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
  260. mindspore/ops/arg_dtype_cast.py +54 -0
  261. mindspore/ops/composite/base.py +37 -10
  262. mindspore/ops/composite/math_ops.py +5 -4
  263. mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
  264. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
  265. mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
  266. mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
  267. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  268. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  269. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  270. mindspore/ops/deprecated.py +304 -0
  271. mindspore/ops/function/__init__.py +4 -1
  272. mindspore/ops/function/array_func.py +174 -193
  273. mindspore/ops/function/clip_func.py +81 -13
  274. mindspore/ops/function/debug_func.py +1 -1
  275. mindspore/ops/function/grad/grad_func.py +18 -9
  276. mindspore/ops/function/image_func.py +10 -4
  277. mindspore/ops/function/linalg_func.py +5 -5
  278. mindspore/ops/function/math_func.py +575 -386
  279. mindspore/ops/function/nn_func.py +568 -260
  280. mindspore/ops/function/random_func.py +88 -57
  281. mindspore/ops/function/sparse_func.py +1 -1
  282. mindspore/ops/function/sparse_unary_func.py +14 -12
  283. mindspore/ops/function/vmap_func.py +6 -5
  284. mindspore/ops/functional.py +15 -10
  285. mindspore/ops/op_info_register.py +244 -25
  286. mindspore/ops/operations/__init__.py +31 -19
  287. mindspore/ops/operations/_grad_ops.py +71 -7
  288. mindspore/ops/operations/_inner_ops.py +350 -17
  289. mindspore/ops/operations/_quant_ops.py +4 -8
  290. mindspore/ops/operations/_sequence_ops.py +42 -0
  291. mindspore/ops/operations/array_ops.py +68 -282
  292. mindspore/ops/operations/comm_ops.py +107 -59
  293. mindspore/ops/operations/custom_ops.py +94 -70
  294. mindspore/ops/operations/debug_ops.py +8 -4
  295. mindspore/ops/operations/image_ops.py +18 -12
  296. mindspore/ops/operations/inner_ops.py +26 -3
  297. mindspore/ops/operations/math_ops.py +192 -144
  298. mindspore/ops/operations/nn_ops.py +857 -489
  299. mindspore/ops/operations/other_ops.py +0 -22
  300. mindspore/ops/operations/random_ops.py +53 -111
  301. mindspore/ops/operations/sparse_ops.py +3 -1
  302. mindspore/ops/primitive.py +24 -18
  303. mindspore/parallel/_auto_parallel_context.py +68 -8
  304. mindspore/parallel/_cost_model_context.py +2 -2
  305. mindspore/parallel/_offload_context.py +17 -3
  306. mindspore/parallel/_parallel_serialization.py +12 -5
  307. mindspore/parallel/_ps_context.py +12 -0
  308. mindspore/parallel/_tensor.py +18 -13
  309. mindspore/parallel/_transformer/layers.py +5 -3
  310. mindspore/parallel/_transformer/loss.py +1 -0
  311. mindspore/parallel/_transformer/moe.py +2 -2
  312. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  313. mindspore/parallel/_transformer/transformer.py +23 -3
  314. mindspore/parallel/_utils.py +11 -7
  315. mindspore/parallel/algo_parameter_config.py +85 -5
  316. mindspore/parallel/checkpoint_transform.py +19 -12
  317. mindspore/parallel/shard.py +21 -14
  318. mindspore/pgodb140.dll +0 -0
  319. mindspore/pgort140.dll +0 -0
  320. mindspore/profiler/common/struct_type.py +3 -3
  321. mindspore/profiler/common/util.py +4 -2
  322. mindspore/profiler/envprofiling.py +1 -1
  323. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  324. mindspore/profiler/parser/ascend_flops_generator.py +2 -2
  325. mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
  326. mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
  327. mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
  328. mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
  329. mindspore/profiler/parser/ascend_op_generator.py +6 -6
  330. mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
  331. mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
  332. mindspore/profiler/parser/base_timeline_generator.py +10 -8
  333. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
  334. mindspore/profiler/parser/flops_parser.py +15 -11
  335. mindspore/profiler/parser/framework_parser.py +38 -22
  336. mindspore/profiler/parser/hccl_parser.py +16 -12
  337. mindspore/profiler/parser/integrator.py +22 -11
  338. mindspore/profiler/parser/memory_usage_parser.py +2 -2
  339. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  340. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  341. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  342. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  343. mindspore/profiler/parser/optime_parser.py +1 -1
  344. mindspore/profiler/parser/profiler_info.py +21 -2
  345. mindspore/profiler/parser/step_trace_parser.py +11 -14
  346. mindspore/profiler/profiling.py +179 -89
  347. mindspore/rewrite/api/node.py +102 -19
  348. mindspore/rewrite/api/node_type.py +5 -1
  349. mindspore/rewrite/api/pattern_engine.py +1 -1
  350. mindspore/rewrite/api/scoped_value.py +9 -17
  351. mindspore/rewrite/api/symbol_tree.py +131 -47
  352. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  353. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  354. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  355. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
  356. mindspore/rewrite/common/rewrite_elog.py +5 -1
  357. mindspore/rewrite/namer.py +33 -24
  358. mindspore/rewrite/namespace.py +14 -5
  359. mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
  360. mindspore/rewrite/node/call_function.py +79 -0
  361. mindspore/rewrite/node/cell_container.py +135 -0
  362. mindspore/rewrite/node/control_flow.py +88 -0
  363. mindspore/rewrite/{node.py → node/node.py} +273 -234
  364. mindspore/rewrite/node/node_manager.py +254 -0
  365. mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
  366. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  367. mindspore/rewrite/parsers/assign_parser.py +216 -221
  368. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  369. mindspore/rewrite/parsers/class_def_parser.py +174 -113
  370. mindspore/rewrite/parsers/constant_parser.py +9 -6
  371. mindspore/rewrite/parsers/container_parser.py +9 -7
  372. mindspore/rewrite/parsers/for_parser.py +42 -21
  373. mindspore/rewrite/parsers/function_def_parser.py +24 -16
  374. mindspore/rewrite/parsers/if_parser.py +28 -24
  375. mindspore/rewrite/parsers/module_parser.py +196 -25
  376. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  377. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  378. mindspore/rewrite/parsers/return_parser.py +6 -6
  379. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  380. mindspore/rewrite/sparsify/utils.py +1 -1
  381. mindspore/rewrite/symbol_tree.py +523 -578
  382. mindspore/rewrite/symbol_tree_builder.py +9 -193
  383. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  384. mindspore/run_check/_check_version.py +6 -4
  385. mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
  386. mindspore/safeguard/rewrite_obfuscation.py +541 -0
  387. mindspore/tbbmalloc.dll +0 -0
  388. mindspore/tinyxml2.dll +0 -0
  389. mindspore/train/_utils.py +7 -3
  390. mindspore/train/amp.py +323 -123
  391. mindspore/train/anf_ir_pb2.py +14 -2
  392. mindspore/train/callback/_backup_and_restore.py +2 -12
  393. mindspore/train/callback/_callback.py +29 -4
  394. mindspore/train/callback/_checkpoint.py +23 -8
  395. mindspore/train/callback/_early_stop.py +2 -2
  396. mindspore/train/callback/_landscape.py +4 -4
  397. mindspore/train/callback/_loss_monitor.py +2 -2
  398. mindspore/train/callback/_on_request_exit.py +2 -2
  399. mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
  400. mindspore/train/callback/_summary_collector.py +15 -8
  401. mindspore/train/callback/_time_monitor.py +58 -5
  402. mindspore/train/data_sink.py +5 -11
  403. mindspore/train/dataset_helper.py +84 -57
  404. mindspore/train/loss_scale_manager.py +2 -2
  405. mindspore/train/metrics/__init__.py +3 -3
  406. mindspore/train/metrics/cosine_similarity.py +1 -1
  407. mindspore/train/metrics/hausdorff_distance.py +3 -2
  408. mindspore/train/metrics/mean_surface_distance.py +3 -2
  409. mindspore/train/metrics/metric.py +39 -19
  410. mindspore/train/metrics/roc.py +2 -2
  411. mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
  412. mindspore/train/mind_ir_pb2.py +85 -36
  413. mindspore/train/model.py +187 -47
  414. mindspore/train/serialization.py +487 -161
  415. mindspore/train/summary/_summary_adapter.py +1 -1
  416. mindspore/train/summary/_writer_pool.py +3 -2
  417. mindspore/train/summary/summary_record.py +37 -17
  418. mindspore/train/train_thor/convert_utils.py +3 -3
  419. mindspore/train/train_thor/dataset_helper.py +1 -1
  420. mindspore/turbojpeg.dll +0 -0
  421. mindspore/vcmeta.dll +0 -0
  422. mindspore/vcruntime140.dll +0 -0
  423. mindspore/vcruntime140_1.dll +0 -0
  424. mindspore/version.py +1 -1
  425. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +7 -4
  426. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +429 -486
  427. mindspore/_extends/graph_kernel/expander.py +0 -80
  428. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
  429. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  430. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  431. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  432. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  433. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  434. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  435. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  436. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  437. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  438. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  439. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  440. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  441. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  442. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  443. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  444. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  445. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  446. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  447. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  448. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  449. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  450. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  451. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  452. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  453. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  454. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  455. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  456. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  457. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  458. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  459. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  460. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  461. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  462. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  463. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  464. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  465. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  466. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  467. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  468. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  469. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  470. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  471. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  472. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  473. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  474. mindspore/dataset/datapreprocess/__init__.py +0 -20
  475. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  476. mindspore/include/api/net.h +0 -142
  477. mindspore/nn/lr_scheduler.py +0 -262
  478. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
  479. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
  480. mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
  481. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  482. mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
  483. mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
  484. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -350
  485. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -409
  486. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -578
  487. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -199
  488. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -446
  489. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  490. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
  491. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
  492. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
  493. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
  494. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
  495. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
  496. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  497. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  498. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
  499. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
  500. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  501. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  502. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
  503. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
  504. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
  505. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
  506. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
  507. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  508. mindspore/rewrite/node_visitor.py +0 -44
  509. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
  510. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
  511. {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2020-2022 Huawei Technologies Co., Ltd
1
+ # Copyright 2020-2023 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -438,7 +438,10 @@ class Softmax(Primitive):
438
438
 
439
439
  Inputs:
440
440
  - **logits** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
441
- additional dimensions, with float16, float32 or float64(CPU, GPU) data type.
441
+ additional dimensions. Supported dtypes:
442
+
443
+ - Ascend: float16, float32.
444
+ - GPU/CPU: float16, float32, float64.
442
445
 
443
446
  Outputs:
444
447
  Tensor, with the same type and shape as the logits.
@@ -517,7 +520,11 @@ class Softplus(Primitive):
517
520
  \text{output} = \log(1 + \exp(\text{x}))
518
521
 
519
522
  Inputs:
520
- - **input_x** (Tensor) - Tensor of any dimension, with float16, float32 or float64(CPU, GPU) data type.
523
+ - **input_x** (Tensor) - Tensor of any dimension.
524
+ Supported dtypes:
525
+
526
+ - GPU/CPU: float16, float32, float64.
527
+ - Ascend: float16, float32.
521
528
 
522
529
  Outputs:
523
530
  Tensor, with the same type and shape as the `input_x`.
@@ -626,7 +633,7 @@ class ReLUV3(Primitive):
626
633
  Inputs:
627
634
  - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
628
635
  additional dimensions, data type is
629
- `number <https://www.mindspore.cn/docs/en/r2.1/api_python/mindspore.html#mindspore.dtype>`_.
636
+ `number <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.html#mindspore.dtype>`_.
630
637
 
631
638
  Outputs:
632
639
  Tensor of shape :math:`(N, *)`, with the same type and shape as the `input_x`.
@@ -659,7 +666,11 @@ class Mish(PrimitiveWithInfer):
659
666
  Refer to :func:`mindspore.ops.mish` for more details.
660
667
 
661
668
  Inputs:
662
- - **x** (Tensor) - The input Tensor with float16, float32 or float64 data type.
669
+ - **x** (Tensor) - The input Tensor.
670
+ Supported dtypes:
671
+
672
+ - GPU/CPU: float16, float32, float64.
673
+ - Ascend: float16, float32.
663
674
 
664
675
  Outputs:
665
676
  Tensor, with the same type and shape as the `x`.
@@ -745,7 +756,9 @@ class ReLU6(PrimitiveWithCheck):
745
756
  Refer to :func:`mindspore.ops.relu6` for more details.
746
757
 
747
758
  Inputs:
748
- - **input_x** (Tensor) - Input Tensor of float16 or float32 data type.
759
+ - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`,
760
+ where :math:`*` means any number of additional dimensions.
761
+ Data type must be float16, float32.
749
762
 
750
763
  Outputs:
751
764
  Tensor, with the same type and shape as the `input_x`.
@@ -1216,54 +1229,6 @@ class InstanceNormV2(Primitive):
1216
1229
  validator.check_bool(is_training, "is_training", self.name)
1217
1230
 
1218
1231
 
1219
- class BNTrainingReduce(Primitive):
1220
- """
1221
- The BNTrainingReduce interface is deprecated, please use the :class:`mindspore.ops.BatchNorm` instead.
1222
-
1223
- Supported Platforms:
1224
- Deprecated
1225
- """
1226
-
1227
- @deprecated("1.5", "ops.BatchNorm", False)
1228
- @prim_attr_register
1229
- def __init__(self, data_format="NCHW"):
1230
- """Initialize BNTrainingReduce."""
1231
- self.init_prim_io_names(inputs=['x'], outputs=['sum', 'square_sum'])
1232
- self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
1233
- if context.get_context("device_target") != "GPU" and self.format == "NHWC":
1234
- raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
1235
- f"but got the 'data_format' is {self.format} and "
1236
- f"the platform is {context.get_context('device_target')}.")
1237
- self.add_prim_attr('data_format', self.format)
1238
-
1239
-
1240
- class BNTrainingUpdate(Primitive):
1241
- """
1242
- The BNTrainingUpdate interface is deprecated, please use the :class:`mindspore.ops.BatchNorm` instead.
1243
-
1244
- Supported Platforms:
1245
- Deprecated
1246
- """
1247
-
1248
- @deprecated("1.5", "ops.BatchNorm", False)
1249
- @prim_attr_register
1250
- def __init__(self, isRef=True, epsilon=1e-5, factor=0.1, data_format="NCHW"):
1251
- """Initialize BNTrainingUpdate."""
1252
- self.init_prim_io_names(inputs=['x', 'sum', 'square_sum', 'scale', 'b', 'mean', 'variance'],
1253
- outputs=['y', 'running_mean', 'running_variance', 'save_mean', 'save_inv_variance'])
1254
- validator.check_value_type("isRef", isRef, [bool], self.name)
1255
- validator.check_value_type("epsilon", epsilon, [float], self.name)
1256
- validator.check_value_type("factor", factor, [float], self.name)
1257
- self.epsilon = validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', 'BNTrainingUpdate')
1258
- self.factor = validator.check_float_range(factor, 0, 1, validator.INC_BOTH, 'factor', 'BNTrainingUpdate')
1259
- self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
1260
- if context.get_context("device_target") != "GPU" and self.format == "NHWC":
1261
- raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
1262
- f"but got the 'data_format' is {self.format} and "
1263
- f"the platform is {context.get_context('device_target')}.")
1264
- self.add_prim_attr('data_format', self.format)
1265
-
1266
-
1267
1232
  class BatchNorm(PrimitiveWithInfer):
1268
1233
  r"""
1269
1234
  Batch Normalization for input data and updated parameters.
@@ -1400,33 +1365,40 @@ class Conv2D(Primitive):
1400
1365
  2D convolution layer.
1401
1366
 
1402
1367
  Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
1403
- where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is height, :math:`W` is width,
1404
- :math:`X_i` is
1405
- the :math:`i^{th}` input value and :math:`b_i` indicates the deviation value of the :math:`i^{th}` input value.
1406
- For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
1368
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is feature height, :math:`W` is feature width.
1369
+
1370
+ The output is calculated based on formula:
1407
1371
 
1408
1372
  .. math::
1409
1373
 
1410
- out_j = \sum_{i=0}^{C_{in} - 1} ccor(W_{ij}, X_i) + b_j,
1411
-
1412
- where :math:`ccor` is the cross correlation operator, :math:`C_{in}` is the input channel number, :math:`j` ranges
1413
- from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
1414
- filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
1415
- of kernel and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
1416
- where :math:`\text{kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the
1417
- convolution kernel. The full kernel has shape
1418
- :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
1419
- where group is the group number to split the input in the channel dimension.
1420
-
1421
- If the 'pad_mode' is set to be "pad", the output height and width will be
1422
- :math:`\left \lfloor{1 + \frac{H_{in} + \text{padding[0]} + \text{padding[1]} - \text{kernel_size[0]} -
1423
- (\text{kernel_size[0]} - 1) \times (\text{dilation[0]} - 1) }{\text{stride[0]}}} \right \rfloor` and
1424
- :math:`\left \lfloor{1 + \frac{W_{in} + \text{padding[2]} + \text{padding[3]} - \text{kernel_size[1]} -
1425
- (\text{kernel_size[1]} - 1) \times (\text{dilation[1]} - 1) }{\text{stride[1]}}} \right \rfloor` respectively.
1426
- Where :math:`dilation` is Spacing between kernel elements, :math:`stride` is The step length of each step,
1427
- :math:`padding` is zero-padding added to both sides of the input.
1428
-
1429
- The first introduction can be found in paper `Gradient Based Learning Applied to Document Recognition
1374
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
1375
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
1376
+
1377
+ where :math:`bias` is the output channel bias, :math:`ccor` is
1378
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
1379
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
1380
+
1381
+ Here are the indices' meanings:
1382
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
1383
+
1384
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
1385
+ output channels, which is also equal to the number of kernels.
1386
+
1387
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
1388
+ input channels, which is also equal to the number of channels in the convolutional kernels.
1389
+
1390
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
1391
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
1392
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
1393
+ channel in the :math:`i`-th batch of the input feature map.
1394
+
1395
+ The shape of the convolutional kernel is given by :math:`(kernel\_size[0], kernel\_size[1])`,
1396
+ where :math:`kernel\_size[0]` and :math:`kernel\_size[1]` are the height and width of the kernel, respectively.
1397
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
1398
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
1399
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
1400
+
1401
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
1430
1402
  <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
1431
1403
 
1432
1404
  Note:
@@ -1434,57 +1406,72 @@ class Conv2D(Primitive):
1434
1406
  That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied.
1435
1407
 
1436
1408
  Args:
1437
- out_channel (int): The number of output channel :math:`C_{out}`.
1438
- kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the height
1439
- and width of the 2D convolution window. Single int means the value is for both the height and the width of
1440
- the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
1441
- width of the kernel.
1442
- mode (int): Modes for different convolutions. The value is currently not used. Default: ``1`` .
1443
- pad_mode (str): Specifies padding mode. The optional values are
1444
- ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
1445
-
1446
- - ``"same"``: Adopts the way of completion. The height and width of the output will be equal to
1447
- the input `x` divided by stride. The padding will be evenly calculated in top and bottom,
1448
- left and right possiblily.
1449
- Otherwise, the last extra padding will be calculated from the bottom and the right side.
1409
+ out_channel (int): Specifies output channel :math:`C_{out}`.
1410
+ kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel.
1411
+ It can be a single int or a tuple of 2 integers. A single int means the value is for both the height
1412
+ and the width. A tuple of 2 ints means the first value is for the height and the other is for the width.
1413
+ mode (int, optional): Modes for different convolutions. The value is currently not used. Default: ``1`` .
1414
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
1415
+ ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
1416
+
1417
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
1418
+ are the same when `stride` is set to ``1``.
1419
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
1420
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
1450
1421
  If this mode is set, `pad` must be 0.
1451
-
1452
- - ``"valid"``: Adopts the way of discarding. The possible largest height and width of output will be
1453
- returned without padding. Extra pixels will be discarded. If this mode is set, `pad` must be 0.
1454
-
1455
- - ``"pad"``: Implicit paddings on both sides of the input `x`. The number of `pad` will be padded to the
1456
- input Tensor borders. `pad` must be greater than or equal to 0.
1457
- pad (Union(int, tuple[int])): Implicit paddings on both sides of the input `x`. If `pad` is one integer,
1458
- the paddings of top, bottom, left and right are the same, equal to pad. If `pad` is a tuple
1459
- with four integers, the paddings of top, bottom, left and right will be equal to pad[0],
1460
- pad[1], pad[2], and pad[3] accordingly. Default: ``0`` .
1461
- stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents
1462
- the height and width of movement are both strides, or a tuple of two or four int numbers that
1463
- represent height and width of movement respectively. Default: ``1`` .
1464
- dilation (Union(int, tuple[int])): The data type is int or a tuple of 2 or 4 integers. Specifies the dilation
1465
- rate to use for dilated convolution. If set to be :math:`k > 1`, there will
1466
- be :math:`k - 1` pixels skipped for each sampling location. Its value must
1467
- be greater than or equal to 1 and bounded by the height and width of the
1468
- input `x`. Default: ``1`` .
1469
- group (int): Splits input into groups. Default: ``1`` .
1470
- data_format (str): The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` . Default: ``"NCHW"`` .
1422
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
1423
+ possible height and width. Extra pixels that could not complete a full stride will
1424
+ be discarded. If this mode is set, `pad` must be 0.
1425
+ - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
1426
+ in the height and width directions is determined by the `pad` parameter.
1427
+ If this mode is set, `pad` must be greater than or equal to 0.
1428
+
1429
+ pad (Union(int, tuple[int]), optional): Specifies the amount of padding to apply on input
1430
+ when `pad_mode` is set to ``"pad"``. It can be a single int or a tuple of 4 ints.
1431
+ If `pad` is one integer, the paddings of top, bottom, left and right are the same, equal to `pad`.
1432
+ If `pad` is a tuple with four integers, the paddings of top, bottom, left and right will be equal to pad[0],
1433
+ pad[1], pad[2], and pad[3] accordingly. Default: ``0`` .
1434
+ stride (Union(int, tuple[int]), optional): Specifies the stride of the convolution kernel's movement.
1435
+ It can be a single int or a tuple of two or four ints. A single int means the stride is the same in
1436
+ both the height and width directions. A tuple of two ints indicates the strides in the height and
1437
+ width directions, respectively. For a tuple of four ints, the two ints correspond to (N, C) dimension
1438
+ are treated as 1, and the two correspond to (H, W) dimensions is the step size in the height
1439
+ and width directions respectively. Default: ``1`` .
1440
+ dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution.
1441
+ It can be a single int or a tuple of 2 or 4 integers. A single int means the dilation size is the same
1442
+ in both the height and width directions. A tuple of two ints represents the dilation size in
1443
+ the height and width directions, respectively. For a tuple of four ints, the two ints correspond
1444
+ to (N, C) dimension are treated as 1, and the two correspond to (H, W) dimensions is the
1445
+ dilation size in the height and width directions respectively.
1446
+ Assuming :math:`dilation=(d0, d1)`, the convolutional kernel samples the input with a
1447
+ spacing of :math:`d0-1` elements in the height direction and :math:`d1-1` elements in the width direction.
1448
+ The values in the height and width dimensions are in the ranges [1, H] and [1, W], respectively.
1449
+ Default: ``1`` .
1450
+ group (int, optional): Specifies the number of groups dividing `x`'s input channel when applying
1451
+ group convolution. Default: ``1`` .
1452
+ data_format (str, optional): The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` .
1453
+ Default: ``"NCHW"`` .
1471
1454
 
1472
1455
  Inputs:
1473
- - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
1474
- - **weight** (Tensor) - Set size of kernel is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`,
1475
- then the shape is :math:`(C_{out}, C_{in}, \text{kernel_size[0]}, \text{kernel_size[1]})`.
1456
+ - **x** (Tensor) - Input tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` or
1457
+ :math:`(N, H_{in}, W_{in}, C_{in}, )` depending on `data_format` .
1458
+ - **weight** (Tensor) - The convolutional kernel value, it should has shape
1459
+ :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})` .
1476
1460
 
1477
1461
  Outputs:
1478
- Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
1462
+ Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`
1463
+ or :math:`(N, H_{out}, W_{out}, C_{out}, )`.
1464
+ To see how different pad modes affect the output shape, please refer to
1465
+ :class:`mindspore.nn.Conv2d` for more details.
1479
1466
 
1480
1467
  Raises:
1481
1468
  TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple.
1482
1469
  TypeError: If `out_channel` or `group` is not an int.
1483
1470
  ValueError: If `kernel_size`, `stride` or `dilation` is less than 1.
1484
- ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'.
1471
+ ValueError: If `pad_mode` is not one of ``'same'``, ``'valid'`` or ``'pad'``.
1485
1472
  ValueError: If `pad` is a tuple whose length is not equal to 4.
1486
- ValueError: If `pad_mode` it not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0).
1487
- ValueError: If `data_format` is neither 'NCHW' nor 'NHWC'.
1473
+ ValueError: If `pad_mode` it not equal to ``'pad'`` and `pad` is not equal to ``(0, 0, 0, 0)``.
1474
+ ValueError: If `data_format` is neither ``'NHWC'`` nor ``'NCHW'`` .
1488
1475
 
1489
1476
  Supported Platforms:
1490
1477
  ``Ascend`` ``GPU`` ``CPU``
@@ -1493,12 +1480,49 @@ class Conv2D(Primitive):
1493
1480
  >>> import mindspore
1494
1481
  >>> import numpy as np
1495
1482
  >>> from mindspore import Tensor, ops
1483
+ >>> # case 1: All parameters use default values.
1496
1484
  >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
1497
1485
  >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
1498
1486
  >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3)
1499
1487
  >>> output = conv2d(x, weight)
1500
1488
  >>> print(output.shape)
1501
1489
  (10, 32, 30, 30)
1490
+ >>> # case 2: pad_mode="pad", other parameters being default.
1491
+ >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
1492
+ >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
1493
+ >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, pad_mode="pad", pad=(4, 10, 4, 10))
1494
+ >>> output = conv2d(x, weight)
1495
+ >>> print(output.shape)
1496
+ (10, 32, 44, 44)
1497
+ >>> # case 3: stride=(2, 4), other parameters being default.
1498
+ >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
1499
+ >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
1500
+ >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, stride=(2, 4))
1501
+ >>> output = conv2d(x, weight)
1502
+ >>> print(output.shape)
1503
+ (10, 32, 15, 8)
1504
+ >>> # case 4: dilation=2, other parameters being default.
1505
+ >>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
1506
+ >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
1507
+ >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, dilation=2)
1508
+ >>> output = conv2d(x, weight)
1509
+ >>> print(output.shape)
1510
+ (10, 32, 28, 28)
1511
+ >>> # case 5: group=2, other parameters being default.
1512
+ >>> x = Tensor(np.ones([10, 64, 32, 32]), mindspore.float32)
1513
+ >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
1514
+ >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, group=2)
1515
+ >>> output = conv2d(x, weight)
1516
+ >>> print(output.shape)
1517
+ (10, 32, 30, 30)
1518
+ >>> # case 6: All parameters are specified.
1519
+ >>> x = Tensor(np.ones([10, 64, 32, 32]), mindspore.float32)
1520
+ >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
1521
+ >>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, pad_mode="pad",
1522
+ ... pad=(4, 10, 4, 10), stride=(2, 4), dilation=2, group=2)
1523
+ >>> output = conv2d(x, weight)
1524
+ >>> print(output.shape)
1525
+ (10, 32, 21, 11)
1502
1526
  """
1503
1527
 
1504
1528
  @prim_attr_register
@@ -1779,8 +1803,13 @@ class _Pool(PrimitiveWithInfer):
1779
1803
  out_w = math.ceil(input_w / stride_w)
1780
1804
  out_shape = [batch, channel, out_h, out_w] if self.format == "NCHW" else [batch, out_h, out_w, channel]
1781
1805
 
1782
- for shape_value in out_shape:
1783
- if shape_value <= 0 and shape_value != -1:
1806
+ is_dynamic_shape = False
1807
+ for in_shape_val in x_shape_norm:
1808
+ if in_shape_val == -1:
1809
+ is_dynamic_shape = True
1810
+
1811
+ for out_shape_val in out_shape:
1812
+ if out_shape_val <= 0 and not is_dynamic_shape:
1784
1813
  raise ValueError(f"For '{self.name}', the each element of the output shape must be larger than 0, "
1785
1814
  f"but got output shape: {out_shape}. The input shape: {x_shape}, "
1786
1815
  f"kernel size: {self.kernel_size}, strides: {self.strides}."
@@ -1814,22 +1843,26 @@ class MaxPool(_Pool):
1814
1843
  strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
1815
1844
  not only the height of movement but also the width of movement, or a tuple of two int numbers that
1816
1845
  represent height and width of movement respectively. Default: ``1`` .
1817
- pad_mode (str): The optional value of pad mode is ``"same"`` or ``"valid"`` .
1818
- Default: ``"valid"`` .
1846
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
1847
+ ``"same"`` or ``"valid"`` . Default: ``"valid"`` .
1819
1848
 
1820
- - ``"same"``: Adopts the way of completion. The height and width of the output will be the same
1821
- as the input. The total number of padding will be calculated in horizontal and vertical
1822
- directions and evenly distributed to top, bottom, left and right if possible.
1823
- Otherwise, the last extra padding will be done from the bottom and the right side.
1849
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
1850
+ are the same when `stride` is set to ``1``.
1851
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
1852
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
1853
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
1854
+ possible height and width. Extra pixels that could not complete a full stride will
1855
+ be discarded.
1824
1856
 
1825
- - ``"valid"``: Adopts the way of discarding. The possible largest height and width of output
1826
- will be returned without padding. Extra pixels will be discarded.
1827
1857
  data_format (str) : The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` .
1828
1858
  Default: ``'NCHW'`` .
1829
1859
 
1830
1860
  Inputs:
1831
1861
  - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
1832
- Supported dtypes: float16, float32, float64.
1862
+ Supported dtypes:
1863
+
1864
+ - CPU: float16, float32, float64.
1865
+ - GPU/Ascend: float16, float32.
1833
1866
 
1834
1867
  Outputs:
1835
1868
  Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
@@ -1887,16 +1920,17 @@ class MaxPoolV1(Primitive):
1887
1920
  strides (Union[int, tuple[int]]): The distance of kernel moving, an integer that represents
1888
1921
  the height and width of movement are both strides, or a tuple of two integers that
1889
1922
  represent height and width of movement, respectively. Default: ``1`` .
1890
- pad_mode (str): The optional value for pad mode, is ``"same"`` or ``"valid"`` .
1891
- Default: ``"valid"`` .
1923
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
1924
+ ``"same"`` or ``"valid"`` . Default: ``"valid"`` .
1892
1925
 
1893
- - ``"same"``: Adopts the way of completion. The height and width of the output will be the same
1894
- as the input. The number of padding will be calculated in horizontal and vertical
1895
- directions, and evenly distributed to top and bottom, left and right if possible.
1896
- Otherwise, the extra padding will be done from the bottom and the right side.
1926
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
1927
+ are the same when `stride` is set to ``1``.
1928
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
1929
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
1930
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
1931
+ possible height and width. Extra pixels that could not complete a full stride will
1932
+ be discarded.
1897
1933
 
1898
- - ``"valid"``: Adopts the way of discarding. The possible largest height and width of the
1899
- output will be returned without padding. Extra pixels will be discarded.
1900
1934
  data_format (str) : The optional value for data format, is ``'NCHW'`` or ``'NHWC'`` .
1901
1935
  Default: ``'NCHW'`` .
1902
1936
 
@@ -1957,55 +1991,6 @@ class MaxPoolV1(Primitive):
1957
1991
  self.add_prim_attr("strides", strides_adapted)
1958
1992
 
1959
1993
 
1960
- class MaxPoolWithArgmax(Primitive):
1961
- r"""
1962
- :class:`mindspore.ops.MaxPoolWithArgmax` is deprecated from version 2.0 and will be removed in a future version,
1963
- use :class:`mindspore.ops.MaxPoolWithArgmaxV2` instead.
1964
-
1965
- Supported Platforms:
1966
- Deprecated
1967
-
1968
- Examples:
1969
- >>> import mindspore
1970
- >>> import numpy as np
1971
- >>> from mindspore import Tensor, ops
1972
- >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32)
1973
- >>> maxpool_arg_op = ops.MaxPoolWithArgmax(pad_mode="VALID", kernel_size=2, strides=1)
1974
- >>> output_tensor, argmax = maxpool_arg_op(x)
1975
- >>> print(output_tensor)
1976
- [[[[ 5. 6. 7.]
1977
- [ 9. 10. 11.]]
1978
- [[17. 18. 19.]
1979
- [21. 22. 23.]]
1980
- [[29. 30. 31.]
1981
- [33. 34. 35.]]]]
1982
- """
1983
-
1984
- @deprecated("2.0", "ops.MaxPoolWithArgmaxV2", False)
1985
- @prim_attr_register
1986
- def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"):
1987
- """Initialize MaxPoolWithArgmax."""
1988
- self.init_prim_io_names(inputs=['x'], outputs=['output', 'mask'])
1989
- validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name)
1990
- validator.check_value_type('strides', strides, [int, tuple], self.name)
1991
- validator.check_value_type('pad_mode', pad_mode, [str], self.name)
1992
- self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.name)
1993
- self.add_prim_attr("pad_mode", self.pad_mode)
1994
- self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
1995
- if context.get_context("device_target") != "GPU" and self.format == "NHWC":
1996
- raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
1997
- f"but got the 'data_format' is {self.format} and "
1998
- f"the platform is {context.get_context('device_target')}.")
1999
- self.kernel_size = _check_positive_int_or_tuple(
2000
- "kernel_size", kernel_size, self.name, allow_four=False, ret_four=True)
2001
- self.kernel_size = (1, self.kernel_size[-2], self.kernel_size[-1], 1)
2002
- self.add_prim_attr("kernel_size", self.kernel_size)
2003
-
2004
- self.strides = _check_positive_int_or_tuple("strides", strides, self.name, allow_four=False, ret_four=True)
2005
- self.strides = (1, self.strides[-2], self.strides[-1], 1)
2006
- self.add_prim_attr("strides", self.strides)
2007
-
2008
-
2009
1994
  class MaxPool3D(Primitive):
2010
1995
  r"""
2011
1996
  Applies a 3D max pooling over an input Tensor which can be regarded as a composition of 3D planes.
@@ -2026,19 +2011,21 @@ class MaxPool3D(Primitive):
2026
2011
  strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
2027
2012
  not only the depth, height of movement but also the width of movement,, or a tuple of three int numbers that
2028
2013
  represent depth, height and width of movement respectively. Default: ``1`` .
2029
- pad_mode (str): The optional value of pad mode is ``"SAME"`` , ``"VALID"`` or ``"PAD"`` .
2030
- Default: ``"VALID"`` .
2031
-
2032
- - ``"SAME"``: Adopts the way of completion. The height and width of the output will be the same
2033
- as the input. The total number of padding will be calculated in horizontal and vertical
2034
- directions and evenly distributed to top, bottom, left and right if possible.
2035
- Otherwise, the last extra padding will be done from the bottom and the right side.
2036
-
2037
- - ``"VALID"``: Adopts the way of discarding. The possible largest height and width of output
2038
- will be returned without padding. Extra pixels will be discarded.
2039
-
2040
- - ``"PAD"``: Implicit paddings on both sides of the input in depth, height and width. The number of
2041
- ``"PAD"`` will be padded to the input Tensor borders. "pad_list" must be greater than or equal to 0.
2014
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
2015
+ ``"SAME"`` , ``"VALID"`` or ``"PAD"`` . Default: ``"VALID"`` .
2016
+
2017
+ - ``"SAME"``: Pad the input around its depth/height/width dimension so that the shape of input and output
2018
+ are the same when `stride` is set to ``1``.
2019
+ The amount of padding to is calculated by the operator internally. If the amount is even,
2020
+ it isuniformly distributed around the input, if it is odd, the excess amount goes
2021
+ to the front/right/bottom side.
2022
+ If this mode is set, `pad_list` must be 0.
2023
+ - ``"VALID"``: No padding is applied to the input, and the output returns the maximum
2024
+ possible depth, height and width. Extra pixels that could not complete a full stride will
2025
+ be discarded. If this mode is set, `pad_list` must be 0.
2026
+ - ``"PAD"``: Pad the input with a specified amount. In this mode, the amount of padding
2027
+ in the depth, height and width dimension is determined by the `pad_list` parameter.
2028
+ If this mode is set, `pad_list` must be greater than or equal to 0.
2042
2029
 
2043
2030
  pad_list (Union(int, tuple[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the
2044
2031
  paddings of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six
@@ -2347,14 +2334,17 @@ class AvgPool(Primitive):
2347
2334
  strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
2348
2335
  the height and width of movement are both strides, or a tuple of two int numbers that
2349
2336
  represent height and width of movement respectively. Default: ``1`` .
2350
- pad_mode (str, optional): The optional value for pad mode, is ``'same'`` or ``'valid'`` .
2351
- Default: ``'valid'`` .
2337
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
2338
+ ``"same"`` or ``"valid"`` . Default: ``"valid"`` .
2352
2339
 
2353
- - ``'same'``: The height and width of the output are the same as the input divided by 'strides'
2354
- and rounded up.
2340
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
2341
+ are the same when `stride` is set to ``1``.
2342
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
2343
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
2344
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
2345
+ possible height and width. Extra pixels that could not complete a full stride will
2346
+ be discarded.
2355
2347
 
2356
- - ``'valid'``: Returns the output of the valid calculation without filling. Redundant pixels that
2357
- do not satisfy the calculation will be discarded.
2358
2348
  data_format (str, optional): The format of input and output data. It should be ``'NHWC'`` or ``'NCHW'`` .
2359
2349
  Default: ``'NCHW'`` .
2360
2350
 
@@ -2451,16 +2441,17 @@ class AvgPoolV1(Primitive):
2451
2441
  strides (Union[int, tuple[int]]): The distance of kernel moving, an integer that represents
2452
2442
  the height and width of movement are both strides, or a tuple of two integers that
2453
2443
  represent height and width of movement, respectively. Default: ``1`` .
2454
- pad_mode (str): The optional value for pad mode, should be one of ``"same"`` or ``"valid"`` .
2455
- Default: ``"valid"`` .
2444
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
2445
+ ``"same"`` or ``"valid"`` . Default: ``"valid"`` .
2456
2446
 
2457
- - ``"same"``: Adopts the way of completion. The height and width of output will be the same as
2458
- the input. The total number of padding will be calculated horizontally and vertically,
2459
- and evenly distributed to top and bottom, left and right if possible.
2460
- Otherwise, the last extra padding will be done from bottom and right.
2447
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
2448
+ are the same when `stride` is set to ``1``.
2449
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
2450
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
2451
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
2452
+ possible height and width. Extra pixels that could not complete a full stride will
2453
+ be discarded.
2461
2454
 
2462
- - ``"valid"``: Adopts the way of discarding. The largest possible height and width of output
2463
- will be returned without padding. Extra pixels will be discarded.
2464
2455
  data_format (str): The format of input and output data. Should be ``'NHWC'`` or ``'NCHW'`` .
2465
2456
  Default: ``'NCHW'`` .
2466
2457
 
@@ -2708,8 +2699,21 @@ class Conv2DTranspose(Conv2DBackpropInput):
2708
2699
  Args:
2709
2700
  out_channel (int): The dimensionality of the output space.
2710
2701
  kernel_size (Union[int, tuple[int]]): The size of the convolution window.
2711
- pad_mode (str): Modes to fill padding. It could be ``"valid"`` , ``"same"`` , or ``"pad"`` .
2712
- Default: ``"valid"`` .
2702
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
2703
+ ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
2704
+
2705
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
2706
+ are the same when `stride` is set to ``1``.
2707
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
2708
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
2709
+ If this mode is set, `pad` must be 0.
2710
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
2711
+ possible height and width. Extra pixels that could not complete a full stride will
2712
+ be discarded. If this mode is set, `pad` must be 0.
2713
+ - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
2714
+ in the height and width directions is determined by the `pad` parameter.
2715
+ If this mode is set, `pad` must be greater than or equal to 0.
2716
+
2713
2717
  Please refer to :class:`mindspore.nn.Conv2dTranspose` for more specifications about `pad_mode`.
2714
2718
  pad (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the paddings
2715
2719
  of top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers,
@@ -2779,9 +2783,13 @@ class BiasAdd(Primitive):
2779
2783
  Default is ``"NCHW"`` .
2780
2784
 
2781
2785
  Inputs:
2782
- - **input_x** (Tensor) - The input tensor. The shape can be 2-5 dimensions.
2786
+ - **input_x** (Tensor) - The input tensor. The shape can be 2-5 dimensions. Supported dtypes:
2787
+
2788
+ - Ascend/CPU: all Number type.
2789
+ - GPU: float16, float32, int8.
2790
+
2783
2791
  - **bias** (Tensor) - The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of
2784
- `input_x`.
2792
+ `input_x`. It has the same type as `input_x`.
2785
2793
 
2786
2794
  Outputs:
2787
2795
  Tensor, with the same shape and data type as `input_x`.
@@ -2790,7 +2798,7 @@ class BiasAdd(Primitive):
2790
2798
  TypeError: If `data_format` is not a str.
2791
2799
  ValueError: If value of `data_format` is not in the range of ['NHWC','NCHW','NCDHW'].
2792
2800
  TypeError: If `input_x` or `bias` is not a Tensor.
2793
- TypeError: If dtype of `input_x` or `bias` is inconsistent.
2801
+ TypeError: If dtype of `input_x` and `bias` is inconsistent.
2794
2802
  TypeError: If dimension of `input_x` is not in the range [2, 5].
2795
2803
 
2796
2804
  Supported Platforms:
@@ -2820,7 +2828,7 @@ class NLLLoss(Primitive):
2820
2828
  r"""
2821
2829
  Gets the negative log likelihood loss between logits and labels.
2822
2830
 
2823
- The nll loss with reduction=none can be described as:
2831
+ The nll loss with :math:`reduction = none` can be described as:
2824
2832
 
2825
2833
  .. math::
2826
2834
 
@@ -2831,7 +2839,7 @@ class NLLLoss(Primitive):
2831
2839
  where :math:`x` is the logits, :math:`t` is the labels, :math:`w` is the weight,
2832
2840
  N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
2833
2841
 
2834
- If reduction is not ``'none'`` (default ``'mean'`` ), then
2842
+ If :math:`reduction \neq none` (default ``'mean'`` ), then
2835
2843
 
2836
2844
  .. math::
2837
2845
 
@@ -2841,8 +2849,13 @@ class NLLLoss(Primitive):
2841
2849
  \end{array}\right.
2842
2850
 
2843
2851
  Args:
2844
- reduction (str): Apply specific reduction method to the output: ``"none"`` , ``"mean"`` , or ``"sum"`` .
2845
- Default: ``"mean"`` .
2852
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2853
+ ``'sum'`` . Default: ``'mean'`` .
2854
+
2855
+ - ``'none'``: no reduction will be applied.
2856
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
2857
+ - ``'sum'``: the output elements will be summed.
2858
+
2846
2859
  ignore_index (int): Specifies a target value that is ignored
2847
2860
  and does not contribute to the input gradient. Default: ``-100`` .
2848
2861
 
@@ -2856,8 +2869,9 @@ class NLLLoss(Primitive):
2856
2869
  Outputs:
2857
2870
  Tuple of 2 tensors composed with `loss` and `total_weight`.
2858
2871
 
2859
- - **loss** (Tensor) - When `reduction` is 'none' and `logits` is a 2D tensor, the `loss` shape is :math:`(N,)`.
2860
- Otherwise, the `loss` is a scalar. The data type is the same with `input's`.
2872
+ - **loss** (Tensor) - When `reduction` is ``'none'`` and `logits` is a 2D tensor,
2873
+ the `loss` shape is :math:`(N,)`. Otherwise, the `loss` is a scalar.
2874
+ The data type is the same with `input's`.
2861
2875
  - **total_weight** (Tensor) - The `total_weight` is a scalar. The data type is the same with `weight's`.
2862
2876
 
2863
2877
  Raises:
@@ -3155,6 +3169,10 @@ class SmoothL1Loss(Primitive):
3155
3169
  reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3156
3170
  ``'sum'`` . Default: ``'none'`` .
3157
3171
 
3172
+ - ``'none'``: no reduction will be applied.
3173
+ - ``'mean'``: compute and return the mean of elements in the output.
3174
+ - ``'sum'``: the output elements will be summed.
3175
+
3158
3176
  Inputs:
3159
3177
  - **logits** (Tensor) - Input Tensor of any dimension. Data type must be float16, float32 or float64.
3160
3178
  - **labels** (Tensor) - Ground truth data, has the same shape and dtype as the `logits`.
@@ -3202,12 +3220,12 @@ class MultiMarginLoss(Primitive):
3202
3220
  Args:
3203
3221
  p (int, optional): The norm degree for pairwise distance. Should be 1 or 2. Default: ``1`` .
3204
3222
  margin (int, optional): A parameter to change pairwise distance. Default: ``1.0`` .
3205
- reduction (str, optional): Apply specific reduction method to the output: ``"none"`` ,
3206
- ``"mean"`` , ``"sum"`` . Default: ``"mean"`` .
3223
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3224
+ ``'sum'`` . Default: ``'mean'`` .
3207
3225
 
3208
- - ``"none"``: no reduction will be applied.
3209
- - ``"mean"``: the sum of the output will be divided by the number of elements in the output.
3210
- - ``"sum"``: the output will be summed.
3226
+ - ``'none'``: no reduction will be applied.
3227
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
3228
+ - ``'sum'``: the output elements will be summed.
3211
3229
 
3212
3230
  Inputs:
3213
3231
  - **inputs** (Tensor) - Input , with shape :math:`(N, C)`. Data type only support float32, float16
@@ -3218,7 +3236,7 @@ class MultiMarginLoss(Primitive):
3218
3236
  support float16, float32 or float64.
3219
3237
 
3220
3238
  Outputs:
3221
- Tensor, When `reduction` is 'none', the shape is :math:`(N,)`.
3239
+ Tensor, When `reduction` is ``'none'``, the shape is :math:`(N,)`.
3222
3240
  Otherwise, it is a scalar. Has the same data type with `inputs`.
3223
3241
 
3224
3242
  Supported Platforms:
@@ -3261,15 +3279,19 @@ class SoftMarginLoss(Primitive):
3261
3279
  where :math:`x.nelement()` is the number of elements of x.
3262
3280
 
3263
3281
  Args:
3264
- reduction (str): Apply specific reduction method to the output: ``"none"`` , ``"mean"`` or ``"sum"`` .
3265
- Default: ``"mean"`` .
3282
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
3283
+ ``'sum'`` . Default: ``'mean'`` .
3284
+
3285
+ - ``'none'``: no reduction will be applied.
3286
+ - ``'mean'``: compute and return the mean of elements in the output.
3287
+ - ``'sum'``: the output elements will be summed.
3266
3288
 
3267
3289
  Inputs:
3268
3290
  - **logits** (Tensor) - Predict data. Data type must be float16 or float32.
3269
3291
  - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`.
3270
3292
 
3271
3293
  Outputs:
3272
- Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`.
3294
+ Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `logits`.
3273
3295
  Otherwise, a scalar value will be returned.
3274
3296
 
3275
3297
  Raises:
@@ -3736,26 +3758,28 @@ class LayerNorm(Primitive):
3736
3758
 
3737
3759
  Args:
3738
3760
  begin_norm_axis (int): The begin axis of the `input_x` to apply LayerNorm,
3739
- the value must be in [-1, rank(input)). Default: ``1`` .
3761
+ the value must be in [-1, rank(input_x)). Default: ``1`` .
3740
3762
  begin_params_axis (int): The begin axis of the parameter input (`gamma`, `beta`) to
3741
- apply LayerNorm, the value must be in [-1, rank(input)). Default: ``1`` .
3742
- epsilon (float): A value added to the denominator for numerical stability. Default: ``1e-7`` .
3763
+ apply LayerNorm, the value must be in [-1, rank(input_x)). Default: ``1`` .
3764
+ epsilon (float): A value added to the denominator for numerical stability(:math:`\epsilon`). Default: ``1e-7`` .
3743
3765
 
3744
3766
  Inputs:
3745
3767
  - **input_x** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
3746
3768
  The input of LayerNorm. Supported dtypes: float16, float32, float64.
3747
- - **gamma** (Tensor) - Tensor of shape :math:`(P_0, \ldots, P_\text{begin_params_axis})`.
3769
+ - **gamma** (Tensor) - Tensor of shape :math:`(P_\text{begin_params_axis}, \ldots, P_\text{rank(input_x)-1})`.
3748
3770
  The learnable parameter :math:`\gamma` as the scale on norm. Supported dtypes: float16, float32, float64.
3749
- - **beta** (Tensor) - Tensor of shape :math:`(P_0, \ldots, P_\text{begin_params_axis})`.
3771
+ - **beta** (Tensor) - Tensor of shape :math:`(P_\text{begin_params_axis}, \ldots, P_\text{rank(input_x)-1})`.
3750
3772
  The learnable parameter :math:`\beta` as the scale on norm. Supported dtypes: float16, float32, float64.
3751
3773
 
3752
3774
  Outputs:
3753
3775
  tuple[Tensor], tuple of 3 tensors, the normalized input and the updated parameters.
3754
3776
 
3755
3777
  - **output_x** (Tensor) - The normalized input, has the same type and shape as the `input_x`.
3756
- The shape is :math:`(N, C)`.
3757
- - **mean** (Tensor) - Tensor of shape :math:`(C,)`.
3758
- - **variance** (Tensor) - Tensor of shape :math:`(C,)`.
3778
+ - **mean** (Tensor) - The first `begin_norm_axis` dimensions of `mean` shape is the same as `input_x`,
3779
+ and the remaining dimensions are 1. Suppose the shape of the `input_x` is :math:`(x_1, x_2, \ldots, x_R)`,
3780
+ the shape of the `mean` is :math:`(x_1, \ldots, x_{begin_params_axis}, 1, \ldots, 1)`
3781
+ (when `begin_params_axis=0`, the shape of `mean` is :math:`(1, \ldots, 1)` ).
3782
+ - **variance** (Tensor) - Shape is the same as `mean` .
3759
3783
 
3760
3784
  Raises:
3761
3785
  TypeError: If `begin_norm_axis` or `begin_params_axis` is not an int.
@@ -3855,38 +3879,6 @@ class L2Normalize(Primitive):
3855
3879
  self.axis = axis
3856
3880
 
3857
3881
 
3858
- class DropoutGenMask(Primitive):
3859
- """
3860
- The DropoutGenMask interface is deprecated, please use the :class:`mindspore.ops.Dropout` instead.
3861
-
3862
- Supported Platforms:
3863
- Deprecated
3864
- """
3865
-
3866
- @deprecated("1.5", "ops.Dropout", False)
3867
- @prim_attr_register
3868
- def __init__(self, Seed0=0, Seed1=0):
3869
- """Initialize DropoutGenMask."""
3870
- self.init_prim_io_names(inputs=['shape', 'keep_prob'], outputs=['output'])
3871
- validator.check_value_type("Seed0", Seed0, [int], self.name)
3872
- validator.check_value_type("Seed1", Seed1, [int], self.name)
3873
- self.add_prim_attr("side_effect_hidden", True)
3874
-
3875
-
3876
- class DropoutDoMask(Primitive):
3877
- """
3878
- The DropoutDoMask interface is deprecated, please use the :class:`mindspore.ops.Dropout` instead.
3879
-
3880
- Supported Platforms:
3881
- Deprecated
3882
- """
3883
-
3884
- @deprecated("1.5", "ops.Dropout", False)
3885
- @prim_attr_register
3886
- def __init__(self):
3887
- pass
3888
-
3889
-
3890
3882
  class ResizeBilinear(PrimitiveWithInfer):
3891
3883
  r"""
3892
3884
  This API is deprecated, please use the :class:`mindspore.ops.ResizeBilinearV2` instead.
@@ -4017,6 +4009,7 @@ class OneHot(Primitive):
4017
4009
 
4018
4010
  Note:
4019
4011
  If the input indices is rank `N`, the output will have rank `N+1`. The new axis is created at dimension `axis`.
4012
+ On Ascend, if `on_value` is Int64 dtype, `indices` must be Int64 dtype.
4020
4013
 
4021
4014
  Args:
4022
4015
  axis (int): Position to insert the value. e.g. If shape of `indices` is :math:`(N, C)`, and `axis` is -1,
@@ -4025,18 +4018,20 @@ class OneHot(Primitive):
4025
4018
 
4026
4019
  Inputs:
4027
4020
  - **indices** (Tensor) - A tensor of indices. Tensor of shape :math:`(X_0, \ldots, X_n)`.
4028
- Data type must be uint8, int32 or int64.
4021
+ Data type must be int32 or int64.
4029
4022
  - **depth** (int) - A scalar defining the depth of the one-hot dimension.
4030
- - **on_value** (Tensor) - A value to fill in output when `indices[j] = i`.
4023
+ - **on_value** (Tensor) - A value to fill in output when `indices[j] = i`. Data type must be int32, int64,
4024
+ float16 or float32.
4031
4025
  - **off_value** (Tensor) - A value to fill in output when `indices[j] != i`.
4032
4026
  It has the same data type as `on_value`.
4033
4027
 
4034
4028
  Outputs:
4035
- Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)`.
4029
+ Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)`,
4030
+ and it has the same data type as `on_value`.
4036
4031
 
4037
4032
  Raises:
4038
4033
  TypeError: If `axis` or `depth` is not an int.
4039
- TypeError: If dtype of `indices` is not uint8, int32 or int64.
4034
+ TypeError: If dtype of `indices` is not int32 or int64.
4040
4035
  TypeError: If `indices`, `on_value` or `off_value` is not a Tensor.
4041
4036
  ValueError: If `axis` is not in range [-1, len(indices_shape)].
4042
4037
  ValueError: If `depth` is less than 0.
@@ -4065,26 +4060,6 @@ class OneHot(Primitive):
4065
4060
  validator.check_value_type("axis", axis, [int], self.name)
4066
4061
 
4067
4062
 
4068
- class Gelu(PrimitiveWithInfer):
4069
- """
4070
- Same as operator GeLU. Gelu will be deprecated in the future.
4071
- Please use GeLU instead.
4072
- """
4073
-
4074
- @deprecated("1.1", "GeLU", True)
4075
- @prim_attr_register
4076
- def __init__(self):
4077
- """Initialize Gelu"""
4078
- self.init_prim_io_names(inputs=['x'], outputs=['output'])
4079
-
4080
- def infer_shape(self, input_x):
4081
- return input_x
4082
-
4083
- def infer_dtype(self, input_x):
4084
- validator.check_tensor_dtype_valid("input_x", input_x, (mstype.float16, mstype.float32), self.name)
4085
- return input_x
4086
-
4087
-
4088
4063
  class GeLU(Primitive):
4089
4064
  r"""
4090
4065
  Gaussian Error Linear Units activation function.
@@ -4131,26 +4106,6 @@ class GeLU(Primitive):
4131
4106
  self.init_prim_io_names(inputs=['x'], outputs=['output'])
4132
4107
 
4133
4108
 
4134
- class FastGelu(PrimitiveWithInfer):
4135
- """
4136
- Same as operator FastGeLU. FastGelu will be deprecated in the future.
4137
- Please use FastGeLU instead.
4138
- """
4139
-
4140
- @deprecated("1.1", "FastGeLU", True)
4141
- @prim_attr_register
4142
- def __init__(self):
4143
- """Initialize FastGelu."""
4144
- self.init_prim_io_names(inputs=['x'], outputs=['output'])
4145
-
4146
- def infer_shape(self, input_x):
4147
- return input_x
4148
-
4149
- def infer_dtype(self, input_x):
4150
- validator.check_tensor_dtype_valid("input_x", input_x, (mstype.float16, mstype.float32), self.name)
4151
- return input_x
4152
-
4153
-
4154
4109
  class FastGeLU(Primitive):
4155
4110
  r"""
4156
4111
  Fast Gaussian Error Linear Units activation function.
@@ -4301,19 +4256,24 @@ class LSTM(Primitive):
4301
4256
  bidirectional (bool): Specifies whether it is a bidirectional LSTM.
4302
4257
  dropout (float): If not 0, append `Dropout` layer on the outputs of each
4303
4258
  LSTM layer except the last layer. The range of dropout is [0.0, 1.0].
4259
+ proj_size (int): If `proj_size` > 0, a projection of the corresponding size will be used,
4260
+ which is only supported on CPU now. Default: ``0`` .
4304
4261
 
4305
4262
  Inputs:
4306
4263
  - **input** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, input\_size)` or
4307
4264
  :math:`(batch\_size, seq\_len, input\_size)`.
4308
- - **h** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
4265
+ - **h** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, real\_hidden\_size)`.
4309
4266
  - **c** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
4310
4267
  - **w** (Tensor) - A weight Tensor.
4311
4268
 
4269
+ If :math:`proj\_size > 0` , :math:`real\_hidden\_size = proj\_size` , otherwise
4270
+ :math:`real\_hidden\_size = hidden\_size` .
4271
+
4312
4272
  Outputs:
4313
- Tuple, a tuple contains (`output`, `h_n`, `c_n`, `reserve`, `state`).
4273
+ Tuple, a tuple contains `(output, h_n, c_n, reserve, state)`.
4314
4274
 
4315
- - **output** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * hidden\_size)`.
4316
- - **h_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
4275
+ - **output** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * real\_hidden\_size)`.
4276
+ - **h_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, real\_hidden\_size)`.
4317
4277
  - **c_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
4318
4278
  - **reserve** (Tensor) - Tensor of shape :math:`(r, 1)`.
4319
4279
  - **state** (Tensor) - Random number generator state and its shape is :math:`(s, 1)`.
@@ -4323,6 +4283,7 @@ class LSTM(Primitive):
4323
4283
  TypeError: If `has_bias` or `bidirectional` is not a bool.
4324
4284
  TypeError: If `dropout` is not a float.
4325
4285
  ValueError: If `dropout` is not in range [0.0, 1.0].
4286
+ ValueError: If `proj_size` is not in range [0, `hidden_size`).
4326
4287
 
4327
4288
  Supported Platforms:
4328
4289
  ``GPU`` ``CPU``
@@ -4356,10 +4317,12 @@ class LSTM(Primitive):
4356
4317
  """
4357
4318
 
4358
4319
  @prim_attr_register
4359
- def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
4320
+ def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout, proj_size=0):
4360
4321
  """Initialize LSTM."""
4361
4322
  self.input_size = validator.check_positive_int(input_size, "input_size", self.name)
4362
4323
  self.hidden_size = validator.check_positive_int(hidden_size, "hidden_size", self.name)
4324
+ self.proj_size = validator.check_int_range(proj_size, 0, hidden_size, validator.INC_LEFT,
4325
+ 'proj_size', self.name)
4363
4326
  self.num_layers = validator.check_positive_int(num_layers, "num_layers", self.name)
4364
4327
  self.has_bias = validator.check_value_type("has_bias", has_bias, (bool,), self.name)
4365
4328
  self.bidirectional = validator.check_value_type("bidirectional", bidirectional, (bool,), self.name)
@@ -4466,8 +4429,12 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
4466
4429
  :math:`P_c>1` increases the recall, :math:`P_c<1` increases the precision.
4467
4430
 
4468
4431
  Args:
4469
- reduction (str): Type of reduction to be applied to loss. The optional values are ``'mean'`` , ``'sum'`` , and
4470
- ``'none'`` , not case sensitive. If ``'none'`` , do not perform reduction. Default: ``'mean'`` .
4432
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
4433
+ ``'sum'`` . Default: ``'mean'`` .
4434
+
4435
+ - ``'none'``: no reduction will be applied.
4436
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
4437
+ - ``'sum'``: the output elements will be summed.
4471
4438
 
4472
4439
  Inputs:
4473
4440
  - **logits** (Tensor) - Input logits. Data type must be float16 or float32.
@@ -4481,7 +4448,7 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
4481
4448
  Data type must be float16 or float32.
4482
4449
 
4483
4450
  Outputs:
4484
- Tensor or Scalar, if `reduction` is 'none', it's a tensor with the same shape and type as input `logits`.
4451
+ Tensor or Scalar, if `reduction` is ``'none'``, it's a tensor with the same shape and type as input `logits`.
4485
4452
  Otherwise, the output is a scalar.
4486
4453
 
4487
4454
  Raises:
@@ -4489,7 +4456,7 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
4489
4456
  TypeError: If data type of any input is neither float16 nor float32.
4490
4457
  TypeError: If data type of `reduction` is not string.
4491
4458
  ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`.
4492
- ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
4459
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
4493
4460
 
4494
4461
  Supported Platforms:
4495
4462
  ``Ascend`` ``GPU`` ``CPU``
@@ -4669,9 +4636,15 @@ class MirrorPad(Primitive):
4669
4636
  Pads the input tensor according to the paddings and mode.
4670
4637
 
4671
4638
  Args:
4672
- mode (str): Specifies the padding mode. The optional values are ``'REFLECT'`` and ``'SYMMETRIC'`` .
4639
+ mode (str, optional): An optional string specifying the pad method.
4640
+ The optional values are ``'REFLECT'`` and ``'SYMMETRIC'`` .
4673
4641
  Default: ``'REFLECT'`` .
4674
4642
 
4643
+ - ``'REFLECT'``: Reflect the value on the edge while omitting the last one.
4644
+ For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [3, 2, 1, 2, 3, 4, 3, 2].
4645
+ - ``'SYMMETRIC'``: Reflect the value on the edge while repeating the last one.
4646
+ For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [2, 1, 1, 2, 3, 4, 4, 3].
4647
+
4675
4648
  Inputs:
4676
4649
  - **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
4677
4650
  additional dimensions.
@@ -4683,15 +4656,14 @@ class MirrorPad(Primitive):
4683
4656
  paddings[D, 0] and paddings[D, 1] must be no greater than input_x.dim_size(D)
4684
4657
  (or input_x.dim_size(D) - 1) if mode is SYMMETRIC (if REFLECT, respectively).
4685
4658
 
4686
-
4687
4659
  Outputs:
4688
4660
  Tensor, the tensor after padding.
4689
4661
 
4690
- - If `mode` is "REFLECT", it uses a way of symmetrical copying through the axis of symmetry to fill in.
4662
+ - If `mode` is ``'REFLECT'``, it uses a way of symmetrical copying through the axis of symmetry to fill in.
4691
4663
  If the `input_x` is [[1,2,3], [4,5,6], [7,8,9]] and `paddings` is [[1,1], [2,2]], then the
4692
4664
  `Outputs` is [[6,5,4,5,6,5,4], [3,2,1,2,3,2,1], [6,5,4,5,6,5,4], [9,8,7,8,9,8,7], [6,5,4,5,6,5,4]].
4693
4665
  For a more intuitive understanding, please see the example below.
4694
- - If `mode` is "SYMMETRIC", the filling method is similar to the "REFLECT". It is also copied
4666
+ - If `mode` is ``'SYMMETRIC'``, the filling method is similar to the ``'REFLECT'``. It is also copied
4695
4667
  according to the symmetry axis, except that it includes the symmetry axis. If the `input_x`
4696
4668
  is [[1,2,3], [4,5,6], [7,8,9]] and `paddings` is [[1,1], [2,2]], then the `Outputs` is
4697
4669
  [[2,1,1,2,3,3,2], [2,1,1,2,3,3,2], [5,4,4,5,6,6,5], [8,7,7,8,9,9,8], [8,7,7,8,9,9,8]].
@@ -5675,7 +5647,7 @@ class KLDivLoss(Primitive):
5675
5647
  - **labels** (Tensor) - The label Tensor which has the same shape and data type as `logits`.
5676
5648
 
5677
5649
  Outputs:
5678
- Tensor or Scalar, if `reduction` is 'none', then output is a tensor and has the same shape as `logits`.
5650
+ Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`.
5679
5651
  Otherwise it is a scalar.
5680
5652
 
5681
5653
  Raises:
@@ -5750,8 +5722,12 @@ class BinaryCrossEntropy(Primitive):
5750
5722
  - The value of :math:`x` must range from 0 to 1.
5751
5723
 
5752
5724
  Args:
5753
- reduction (str): Specifies the reduction to be applied to the output.
5754
- Its value must be one of ``'none'`` , ``'mean'`` or ``'sum'`` . Default: ``'mean'`` .
5725
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
5726
+ ``'sum'`` . Default: ``'mean'`` .
5727
+
5728
+ - ``'none'``: no reduction will be applied.
5729
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
5730
+ - ``'sum'``: the output elements will be summed.
5755
5731
 
5756
5732
  Inputs:
5757
5733
  - **logits** (Tensor) - The predictive value whose data type must be float16 or float32,
@@ -5766,7 +5742,7 @@ class BinaryCrossEntropy(Primitive):
5766
5742
 
5767
5743
  Raises:
5768
5744
  TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 nor float32.
5769
- ValueError: If `reduction` is not one of 'none', 'mean' or 'sum'.
5745
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
5770
5746
  ValueError: If shape of `labels` is not the same as `logits` or `weight` (if given).
5771
5747
  TypeError: If `logits`, `labels` or `weight` is not a Tensor.
5772
5748
 
@@ -7173,7 +7149,19 @@ class Dropout(PrimitiveWithCheck):
7173
7149
 
7174
7150
  Outputs:
7175
7151
  - **output** (Tensor) - With the same shape and data type as `x`.
7176
- - **mask** (Tensor) - With the same shape as `x`.
7152
+ - **mask** (Tensor) - The mask applied to `x`.
7153
+
7154
+ - On GPU and CPU, `mask` has the same shape and data type as `x`.
7155
+ - On Ascend, to achieve a better performance, it is denoted as a 1-D Tensor
7156
+ with Uint8 data type. It has shape :math:`(byte\_counts, )` where :math:`byte\_counts` is the
7157
+ number of bytes needed to mask the input `x`, :math:`byte\_counts` is calculated using the
7158
+ following formula:
7159
+
7160
+ .. math::
7161
+
7162
+ byte\_counts = \text{ceil}(\text{cumprod}(x.shape) / 128) * 16
7163
+
7164
+ If shape of `x` is :math:`(2, 3, 4, 5, 6)`, the shape of `mask` will be :math:`(96, )`.
7177
7165
 
7178
7166
  Supported Platforms:
7179
7167
  ``Ascend`` ``GPU`` ``CPU``
@@ -7195,6 +7183,7 @@ class Dropout(PrimitiveWithCheck):
7195
7183
  self.seed0 = validator.check_value_type("Seed0", Seed0, [int], self.name)
7196
7184
  self.seed1 = validator.check_value_type("Seed1", Seed1, [int], self.name)
7197
7185
  self.keep_prob = validator.check_float_range(keep_prob, 0, 1, validator.INC_RIGHT, "keep_prob", self.name)
7186
+ self.add_prim_attr("side_effect_hidden", True)
7198
7187
 
7199
7188
  def check_shape(self, x_shape):
7200
7189
  validator.check_int(len(x_shape), 1, validator.GE, "x_shape", self.name)
@@ -7402,6 +7391,9 @@ class CTCGreedyDecoder(Primitive):
7402
7391
 
7403
7392
  Refer to :func:`mindspore.ops.ctc_greedy_decoder` for more details.
7404
7393
 
7394
+ Note:
7395
+ On Ascend, 'merge_repeated' can not be set to false.
7396
+
7405
7397
  Args:
7406
7398
  merge_repeated (bool, optional): If ``True`` , merge repeated classes in output. Default: ``True`` .
7407
7399
 
@@ -7824,6 +7816,10 @@ class LRN(Primitive):
7824
7816
  r"""
7825
7817
  Local Response Normalization.
7826
7818
 
7819
+ .. warning::
7820
+ LRN is deprecated on Ascend due to potential accuracy problem. It's recommended to use other
7821
+ normalization methods, e.g. :class:`mindspore.ops.BatchNorm`.
7822
+
7827
7823
  .. math::
7828
7824
 
7829
7825
  b_{c} = a_{c}\left(k + \frac{\alpha}{n}
@@ -7854,7 +7850,7 @@ class LRN(Primitive):
7854
7850
  TypeError: If `x` is not a Tensor.
7855
7851
 
7856
7852
  Supported Platforms:
7857
- ``Ascend`` ``GPU`` ``CPU``
7853
+ ``GPU`` ``CPU``
7858
7854
 
7859
7855
  Examples:
7860
7856
  >>> import mindspore
@@ -7908,21 +7904,22 @@ class AvgPool3D(Primitive):
7908
7904
  strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
7909
7905
  the depth, height and width of movement are both strides, or a tuple of three int numbers that
7910
7906
  represent depth, height and width of movement respectively. Default: ``1`` .
7911
- pad_mode (str): The optional value for pad mode, is ``"same"`` , ``"valid"`` , ``"pad"`` .
7912
- Default: ``"valid"`` .
7913
-
7914
- - ``"same"``: Adopts the way of completion. The depth, height and width of the output will be the same
7915
- as the input. The total number of padding will be calculated in depth, horizontal and vertical
7916
- directions and evenly distributed to head and tail, top and bottom, left and right if possible.
7917
- Otherwise, the last extra padding will be done from the tail, bottom and the right side.
7907
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
7908
+ ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
7909
+
7910
+ - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
7911
+ are the same when `stride` is set to ``1``.
7912
+ The amount of padding to is calculated by the operator internally. If the amount is even,
7913
+ it isuniformly distributed around the input, if it is odd, the excess amount goes
7914
+ to the front/right/bottom side.
7918
7915
  If this mode is set, `pad` must be 0.
7916
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
7917
+ possible depth, height and width. Extra pixels that could not complete a full stride will
7918
+ be discarded. If this mode is set, `pad` must be 0.
7919
+ - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
7920
+ in the depth, height and width dimension is determined by the `pad` parameter.
7921
+ If this mode is set, `pad` must be greater than or equal to 0.
7919
7922
 
7920
- - ``"valid"``: Adopts the way of discarding. The possible largest depth, height and width of output
7921
- will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
7922
- must be 0.
7923
-
7924
- - pad: Implicit paddings on both sides of the input in depth, height, width. The number of `pad` will
7925
- be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
7926
7923
  pad (Union(int, tuple[int], list[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer,
7927
7924
  the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
7928
7925
  If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
@@ -8005,74 +8002,97 @@ class AvgPool3D(Primitive):
8005
8002
 
8006
8003
  class Conv3D(Primitive):
8007
8004
  r"""
8008
- Applies a 3D convolution over an input tensor. The input tensor is typically of shape
8009
- :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape
8010
- :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`, where :math:`N` is batch size, :math:`C` is channel number,
8011
- :math:`D` is depth, :math:`H, W` is feature height and width respectively.
8012
- the output value of a layer is calculated as:
8005
+ 3D convolution layer.
8006
+
8007
+ Applies a 3D convolution over an input tensor which is typically of shape
8008
+ :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`,
8009
+ where :math:`N` is batch size, :math:`C` is channel number, :math:`D` is feature depth,
8010
+ :math:`H` is feature height, :math:`W` is feature width.
8011
+
8012
+ The output is calculated based on formula:
8013
8013
 
8014
8014
  .. math::
8015
- \operatorname{out}\left(N_{i}, C_{\text {out}_j}\right)=\operatorname{bias}\left(C_{\text {out}_j}\right)+
8016
- \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right),
8017
- \operatorname{input}\left(N_{i}, k\right))
8018
-
8019
- where :math:`k` is kernel,
8020
- :math:`ccor` is the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_ ,
8021
- :math:`C_{in}` is the channel number of the input, :math:`out_{j}` corresponds to the :math:`j`-th channel of
8022
- the output and :math:`j` is in the range of :math:`[0, C_{out} - 1]`. :math:`\text{weight}(C_{\text{out}_j}, k)`
8023
- is a convolution kernel slice with shape
8024
- :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
8025
- where :math:`\text{kernel_size[0]}`, :math:`\text{kernel_size[1]}` and :math:`\text{kernel_size[2]}` are
8026
- the depth, height and width of the convolution kernel respectively. :math:`\text{bias}` is the bias parameter
8027
- and :math:`\text{X}` is the input tensor.
8028
- The shape of full convolution kernel is
8029
- :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`,
8030
- where `groups` is the number of groups to split `input` in the channel dimension.
8031
-
8032
- For more details, please refer to the paper `Gradient Based Learning Applied to Document
8033
- Recognition <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ .
8015
+
8016
+ \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
8017
+ \sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
8018
+
8019
+ where :math:`bias` is the output channel bias, :math:`ccor` is
8020
+ the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
8021
+ , :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
8022
+
8023
+ Here are the indices' meanings:
8024
+ - :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
8025
+
8026
+ - :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
8027
+ output channels, which is also equal to the number of kernels.
8028
+
8029
+ - :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
8030
+ input channels, which is also equal to the number of channels in the convolutional kernels.
8031
+
8032
+ Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
8033
+ output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
8034
+ kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
8035
+ channel in the :math:`i`-th batch of the input feature map.
8036
+
8037
+ The shape of the convolutional kernel is given by
8038
+ :math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
8039
+ where :math:`kernel\_size[0]` , :math:`kernel\_size[1]` and :math:`kernel\_size[2]` are the depth,
8040
+ height and width of the kernel, respectively.
8041
+ If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
8042
+ will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
8043
+ \text{kernel_size[1]}, \text{kernel_size[2]})`,
8044
+ where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
8045
+
8046
+ For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
8047
+ <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
8034
8048
 
8035
8049
  Note:
8036
- On Ascend platform, `group = 1` must be satisfied.
8050
+ 1. On Ascend platform, `groups = 1` must be satisfied.
8051
+ 2. On Ascend `dilation` on depth only supports the case of 1.
8037
8052
 
8038
8053
  Args:
8039
- out_channel (int): The number of output channel :math:`C_{out}`.
8040
- kernel_size (Union[int, tuple[int]]): Specifies the depth, height
8041
- and width of the 3D convolution window. It can be a single int or a tuple of 3 integers.
8042
- Single int means the value is for the depth, height and width
8043
- of the kernel. A tuple of 3 ints corresponds to the depth, height and width of the kernel respectively.
8054
+ out_channel (int): Specifies output channel :math:`C_{out}`.
8055
+ kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel.
8056
+ It can be a single int or a tuple of 3 integers. A single int means the value is for depth, height
8057
+ and the width. A tuple of 3 ints means the first value is for depth and
8058
+ the rest is for the height and width.
8044
8059
  mode (int, optional): Modes for different convolutions. It is currently not used. Default: ``1`` .
8045
8060
  stride (Union[int, tuple[int]], optional): The distance of kernel moving, it can be an int number
8046
8061
  that represents the depth, height and width of movement or a tuple of three int numbers that
8047
8062
  represent depth, height and width movement respectively. Default: ``1`` .
8048
- pad_mode (str, optional): Specifies padding mode. The optional values are
8049
- ``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
8050
-
8051
- - ``"same"``: Adopts the way of completion. The depth, height and width of the output will be equal to
8052
- the input `x` divided by stride. The padding will be evenly calculated in head and tail, top and bottom,
8053
- left and right directions possiblily.
8054
- Otherwise, the last extra padding will be calculated from the tail, bottom and the right side.
8063
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
8064
+ ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
8065
+
8066
+ - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
8067
+ are the same when `stride` is set to ``1``.
8068
+ The amount of padding to is calculated by the operator internally. If the amount is even,
8069
+ it isuniformly distributed around the input, if it is odd, the excess amount goes
8070
+ to the front/right/bottom side.
8055
8071
  If this mode is set, `pad` must be 0.
8056
-
8057
- - ``"valid"``: Adopts the way of discarding. The possible largest depth, height and width of output
8058
- will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
8059
- must be 0.
8060
-
8061
- - ``"pad"``: Implicit paddings on both sides of the input in depth, height and width. The number of `pad`
8062
- will be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
8063
-
8064
- pad (Union(int, tuple[int]), optional): The pad value to be filled. Default: ``0`` .
8065
- If `pad` is an integer, the paddings
8066
- of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six
8067
- integers, the padding of head, tail, top, bottom, left and right equal to pad[0], pad[1], pad[2],
8068
- pad[3], pad[4] and pad[5] correspondingly.
8069
- dilation (Union[int, tuple[int]], optional): The data type is int or a tuple of 3 integers
8070
- :math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1
8071
- on Ascend backend. Specifies the dilation rate to use for dilated convolution. If set :math:`k > 1`,
8072
- there will be :math:`k - 1` pixels skipped for each sampling location.
8073
- The value ranges for the depth, height, and width dimensions are [1, D], [1, H], and [1, W],
8074
- respectively. Default: ``1`` .
8075
- group (int, optional):The number of groups into which the filter is divided. `in_channels`
8072
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
8073
+ possible depth, height and width. Extra pixels that could not complete a full stride will
8074
+ be discarded. If this mode is set, `pad` must be 0.
8075
+ - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
8076
+ in the depth, height and width dimension is determined by the `pad` parameter.
8077
+ If this mode is set, `pad` must be greater than or equal to 0.
8078
+
8079
+ pad (Union(int, tuple[int]), optional): Specifies the amount of padding to apply on input
8080
+ when `pad_mode` is set to ``"pad"``. It can be a single int or a tuple of 6 ints.
8081
+ If `pad` is one integer, the paddings of head, tail, top, bottom,
8082
+ left and right are the same, equal to `pad`. If `pad` is a tuple with 6 integers, the
8083
+ paddings of head, tail, top, bottom, left and right is equal to pad[0],
8084
+ pad[1], pad[2], pad[3], pad[4] and pad[5] accordingly. Default: ``0`` .
8085
+ dilation (Union[int, tuple[int]], optional): Specifies the dilation rate to use for dilated convolution.
8086
+ It can be a single int or a tuple of 3 integers. A single int means the dilation size is the same
8087
+ in the depth, height and width directions. A tuple of 3 ints represents the dilation size in
8088
+ the depth, height and width directions, respectively.
8089
+ Assuming :math:`dilation=(d0, d1, d2)`, the convolutional kernel samples the input with a
8090
+ spacing of :math:`d0-1` elements in the depth direction,
8091
+ :math:`d1-1` elements in the height direction, :math:`d2-1` elements in the
8092
+ width direction respectively. The values in the depth, height and width dimensions are in the
8093
+ ranges [1, D], [1, H] and [1, W], respectively.
8094
+ Default: ``1`` .
8095
+ group (int, optional): The number of groups into which the filter is divided. `in_channels`
8076
8096
  and `out_channels` must be divisible by `group`. Default: ``1`` .
8077
8097
  data_format (str, optional): The optional value for data format. Currently only support ``"NCDHW"`` .
8078
8098
 
@@ -8088,7 +8108,7 @@ class Conv3D(Primitive):
8088
8108
  Outputs:
8089
8109
  Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
8090
8110
 
8091
- `pad_mode` is 'same':
8111
+ `pad_mode` is ``"same"``:
8092
8112
 
8093
8113
  .. math::
8094
8114
  \begin{array}{ll} \\
@@ -8097,7 +8117,7 @@ class Conv3D(Primitive):
8097
8117
  W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
8098
8118
  \end{array}
8099
8119
 
8100
- `pad_mode` is 'valid':
8120
+ `pad_mode` is ``"valid"``:
8101
8121
 
8102
8122
  .. math::
8103
8123
  \begin{array}{ll} \\
@@ -8109,15 +8129,15 @@ class Conv3D(Primitive):
8109
8129
  {\text{stride[2]}} + 1} \right \rfloor \\
8110
8130
  \end{array}
8111
8131
 
8112
- `pad_mode` is 'pad':
8132
+ `pad_mode` is ``"pad"``:
8113
8133
 
8114
8134
  .. math::
8115
8135
  \begin{array}{ll} \\
8116
- D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
8136
+ D_{out} = \left \lfloor{\frac{D_{in} + pad[0] + pad[1] - (\text{dilation[0]} - 1) \times
8117
8137
  \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
8118
- H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
8138
+ H_{out} = \left \lfloor{\frac{H_{in} + pad[2] + pad[3] - (\text{dilation[1]} - 1) \times
8119
8139
  \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
8120
- W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
8140
+ W_{out} = \left \lfloor{\frac{W_{in} + pad[4] + pad[5] - (\text{dilation[2]} - 1) \times
8121
8141
  \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
8122
8142
  \end{array}
8123
8143
 
@@ -8138,12 +8158,56 @@ class Conv3D(Primitive):
8138
8158
  >>> import mindspore
8139
8159
  >>> import numpy as np
8140
8160
  >>> from mindspore import Tensor, ops
8161
+ >>> # case 1: specify kernel_size with tuple, all parameters use default values.
8141
8162
  >>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float16)
8142
8163
  >>> weight = Tensor(np.ones([32, 3, 4, 3, 3]), mindspore.float16)
8143
8164
  >>> conv3d = ops.Conv3D(out_channel=32, kernel_size=(4, 3, 3))
8144
8165
  >>> output = conv3d(x, weight)
8145
8166
  >>> print(output.shape)
8146
8167
  (16, 32, 7, 30, 30)
8168
+ >>> # case 2: specify kernel_size with int, all parameters use default values.
8169
+ >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
8170
+ >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
8171
+ >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3)
8172
+ >>> output = conv3d(x, weight)
8173
+ >>> print(output.shape)
8174
+ (10, 40, 30, 30, 30)
8175
+ >>> # case 3: stride=(1, 2, 3), other parameters being default.
8176
+ >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
8177
+ >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
8178
+ >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, stride=(1, 2, 3))
8179
+ >>> output = conv3d(x, weight)
8180
+ >>> print(output.shape)
8181
+ (10, 40, 30, 15, 10)
8182
+ >>> # case 4: pad_mode="pad", other parameters being default.
8183
+ >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
8184
+ >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
8185
+ >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, pad_mode="pad", pad=2)
8186
+ >>> output = conv3d(x, weight)
8187
+ >>> print(output.shape)
8188
+ (10, 40, 34, 34, 34)
8189
+ >>> # case 5: dilation=(1, 1, 1), other parameters being default.
8190
+ >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
8191
+ >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
8192
+ >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, dilation=(1, 1, 1))
8193
+ >>> output = conv3d(x, weight)
8194
+ >>> print(output.shape)
8195
+ (10, 40, 30, 30, 30)
8196
+ >>> # case 6: group=1, other parameters being default.
8197
+ >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
8198
+ >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
8199
+ >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, group=1)
8200
+ >>> output = conv3d(x, weight)
8201
+ >>> print(output.shape)
8202
+ (10, 40, 30, 30, 30)
8203
+ >>> # case 7: All parameters are specified.
8204
+ >>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
8205
+ >>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
8206
+ >>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, stride=(1, 2, 3), pad_mode="pad",
8207
+ ... pad=2, dilation=(1), group=1)
8208
+ >>> output = conv3d(x, weight)
8209
+ >>> print(output.shape)
8210
+ (10, 40, 34, 17, 12)
8147
8211
  """
8148
8212
 
8149
8213
  @prim_attr_register
@@ -8218,8 +8282,22 @@ class Conv3DBackpropInput(Primitive):
8218
8282
  out_channel (int): The dimension of the output.
8219
8283
  kernel_size (Union[int, tuple[int]]): The kernel size of the 3D convolution.
8220
8284
  mode (int): Modes for different convolutions. Not currently used.
8221
- pad_mode (str): Modes to fill padding. It could be ``"valid"`` , ``"same"`` , or ``"pad"`` .
8222
- Default: ``"valid"`` .
8285
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
8286
+ ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
8287
+
8288
+ - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
8289
+ are the same when `stride` is set to ``1``.
8290
+ The amount of padding to is calculated by the operator internally. If the amount is even,
8291
+ it isuniformly distributed around the input, if it is odd, the excess amount goes
8292
+ to the front/right/bottom side.
8293
+ If this mode is set, `pad` must be 0.
8294
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
8295
+ possible depth, height and width. Extra pixels that could not complete a full stride will
8296
+ be discarded. If this mode is set, `pad` must be 0.
8297
+ - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
8298
+ in the depth, height and width dimension is determined by the `pad` parameter.
8299
+ If this mode is set, `pad` must be greater than or equal to 0.
8300
+
8223
8301
  pad (Union(int, tuple[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the
8224
8302
  paddings of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a
8225
8303
  tuple of four integers, the padding of head, tail, top, bottom, left and right equal to pad[0],
@@ -8443,13 +8521,14 @@ class CTCLossV2(Primitive):
8443
8521
 
8444
8522
  Args:
8445
8523
  blank (int, optional): The blank label. Default: ``0`` .
8446
- reduction (str, optional): Apply specific reduction method to the output. Currently only support ``'none'`` ,
8447
- not case sensitive. Default: ``"none"`` .
8524
+ reduction (str, optional): Apply specific reduction method to the output. Currently only support ``'none'``.
8525
+ Default: ``'none'`` .
8526
+
8448
8527
  zero_infinity (bool, optional): If loss is infinite, this parameter determines whether to set that loss
8449
8528
  and its correlated gradient to zero. Default: ``False`` .
8450
8529
 
8451
8530
  Inputs:
8452
- - **log_probs** (Tensor) - A tensor of shape :math:`(T, C, N)`, where :math:`T` is input length, :math:`N` is
8531
+ - **log_probs** (Tensor) - A tensor of shape :math:`(T, N, C)`, where :math:`T` is input length, :math:`N` is
8453
8532
  batch size and :math:`C` is number of classes (including blank). Supported dtypes: float32, float64.
8454
8533
  - **targets** (Tensor) - A tensor of shape :math:`(N, S)`, where :math:`S` is max target length,
8455
8534
  means the target sequences. Supported dtypes: int32, int64.
@@ -8601,35 +8680,37 @@ class Conv3DTranspose(Primitive):
8601
8680
  Single int means the value is for the depth, height and width of the kernel.
8602
8681
  A tuple of 3 ints means the first value is for the depth, the second value is for the height and the
8603
8682
  other is for the width of the kernel.
8604
- mode (int): Modes for different convolutions. Default is ``1`` . It is currently not used.
8605
- pad_mode (str): Specifies padding mode. The optional values are
8606
- ``"same"`` , ``"valid"`` , ``"pad"`` . Default: ``"valid"`` .
8607
-
8608
- - ``"same"``: Adopts the way of completion. The depth, height and width of the output will be equal to
8609
- the input `x` divided by stride. The padding will be evenly calculated in head and tail, top and bottom,
8610
- left and right directions possiblily.
8611
- Otherwise, the last extra padding will be calculated from the tail, bottom and the right side.
8683
+ mode (int, optional): Modes for different convolutions. Default is ``1`` . It is currently not used.
8684
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
8685
+ ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
8686
+
8687
+ - ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
8688
+ are the same when `stride` is set to ``1``.
8689
+ The amount of padding to is calculated by the operator internally. If the amount is even,
8690
+ it isuniformly distributed around the input, if it is odd, the excess amount goes
8691
+ to the front/right/bottom side.
8612
8692
  If this mode is set, `pad` must be 0.
8613
-
8614
- - ``"valid"``: Adopts the way of discarding. The possible largest depth, height and width of output
8615
- will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
8616
- and `output_padding` must be 0.
8617
-
8618
- - ``"pad"``: Implicit paddings on both sides of the input in depth, height and width. The number of `pad`
8619
- will be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
8620
-
8621
- pad (Union(int, tuple[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the paddings
8622
- of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six integers,
8623
- the padding of head, tail, top, bottom, left and right equal to pad[0], pad[1], pad[2], pad[3], pad[4]
8624
- and pad[5] correspondingly.
8625
- stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents
8693
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
8694
+ possible depth, height and width. Extra pixels that could not complete a full stride will
8695
+ be discarded. If this mode is set, `pad` must be 0.
8696
+ - ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
8697
+ in the depth, height and width dimension is determined by the `pad` parameter.
8698
+ If this mode is set, `pad` must be greater than or equal to 0.
8699
+
8700
+ pad (Union(int, tuple[int]), optional): The pad value to be filled. Default: ``0`` . If `pad` is an integer,
8701
+ the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
8702
+ If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal
8703
+ to pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly.
8704
+ stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
8626
8705
  the depth, height and width of movement are both strides, or a tuple of three int numbers that
8627
8706
  represent depth, height and width of movement respectively. Default: ``1`` .
8628
- dilation (Union(int, tuple[int])): Specifies the space to use between kernel elements. Default: ``1`` .
8629
- group (int): The number of groups into which the filter is divided. `in_channels`
8707
+ dilation (Union(int, tuple[int]), optional): Specifies the space to use between kernel elements.
8708
+ Default: ``1`` .
8709
+ group (int, optional): The number of groups into which the filter is divided. `in_channels`
8630
8710
  and `out_channels` must be divisible by `group`. Default: ``1`` .
8631
- output_padding (Union(int, tuple[int])): Add extra size to each dimension of the output. Default: ``0`` .
8632
- data_format (str): The optional value for data format. Currently only ``'NCDHW'`` is supported.
8711
+ output_padding (Union(int, tuple[int]), optional): Add extra size to each dimension of the output.
8712
+ Default: ``0`` .
8713
+ data_format (str, optional): The optional value for data format. Currently only ``'NCDHW'`` is supported.
8633
8714
  Default: ``'NCDHW'``.
8634
8715
 
8635
8716
  Inputs:
@@ -8794,14 +8875,17 @@ class Dilation2D(Primitive):
8794
8875
  each sampling location. Its value must be greater or equal to 1 and bounded by
8795
8876
  the height and width of the input `x`.
8796
8877
 
8797
- pad_mode (str, optional): Specifies padding mode. The optional values are
8798
- ``"same"`` , ``"valid"`` . Default: ``"same"`` . Both upper and lower case are supported.
8878
+ pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
8879
+ ``"same"`` or ``"valid"`` . Default: ``"valid"`` .
8799
8880
 
8800
- - ``"same"``: Adopts the way of completion. The height and width of the output will be the same as
8801
- the input `x`.
8881
+ - ``"same"``: Pad the input around its edges so that the shape of input and output
8882
+ are the same when `stride` is set to ``1``.
8883
+ The amount of padding to is calculated by the operator internally, If the amount is even, it is
8884
+ uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
8885
+ - ``"valid"``: No padding is applied to the input, and the output returns the maximum
8886
+ possible height and width. Extra pixels that could not complete a full stride will
8887
+ be discarded.
8802
8888
 
8803
- - ``"valid"``: Adopts the way of discarding. The possible largest height and width of output will be
8804
- returned without padding. Extra pixels will be discarded.
8805
8889
  data_format (str, optional): The value for data format, only ``'NCHW'`` is supported at present.
8806
8890
  Default: ``"NCHW"`` .
8807
8891
 
@@ -8879,7 +8963,11 @@ class Dilation2D(Primitive):
8879
8963
  self.pad_mode = validator.check_string(pad_mode, ['VALID', 'SAME', 'valid', 'same'], 'pad_mode', self.name)
8880
8964
  self.add_prim_attr('pad_mode', self.pad_mode.upper())
8881
8965
  self.stride = _check_format_stride_or_dilation("stride", stride, self.name, self.data_format)
8882
- if self.stride[2] < 1 or self.stride[2] > 255 or self.stride[3] < 1 or self.stride[3] > 255:
8966
+
8967
+ def is_in_range(x):
8968
+ return 1 <= x <= 255
8969
+
8970
+ if not is_in_range(self.stride[2]) or not is_in_range(self.stride[3]):
8883
8971
  raise ValueError(f'For Dilation2D, size of stride is not supported, '
8884
8972
  f'stride should be in the range of [1, 255], '
8885
8973
  f'but got stride_h: `{self.stride[2]}`, stride_w: `{self.stride[3]}`.')
@@ -9418,8 +9506,8 @@ class MultilabelMarginLoss(Primitive):
9418
9506
  ``'sum'`` . Default: ``'mean'`` .
9419
9507
 
9420
9508
  - ``'none'``: no reduction will be applied.
9421
- - ``'mean'``: the sum of the output will be divided by the number of elements in the output.
9422
- - ``'sum'``: the output will be summed.
9509
+ - ``'mean'``: compute and return the mean of elements in the output.
9510
+ - ``'sum'``: the output elements will be summed.
9423
9511
 
9424
9512
  Inputs:
9425
9513
  - **x** (Tensor) - Predict data. Tensor of shape :math:`(C)` or :math:`(N, C)`, where :math:`N`
@@ -9428,7 +9516,7 @@ class MultilabelMarginLoss(Primitive):
9428
9516
  label targets padded by -1.
9429
9517
 
9430
9518
  Outputs:
9431
- - **y** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is "none", its shape
9519
+ - **y** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is ``"none"``, its shape
9432
9520
  is :math:`(N)`. Otherwise, a scalar value will be returned.
9433
9521
  - **is_target** (Tensor) - Output tensor for backward input, with the same shape as `target`,
9434
9522
  data type must be int32.
@@ -9694,8 +9782,22 @@ class GridSampler3D(Primitive):
9694
9782
  Args:
9695
9783
  interpolation_mode (str, optional): An optional string specifying the interpolation method.
9696
9784
  The optional values are ``"bilinear"`` or ``"nearest"`` . Default: ``"bilinear"`` .
9785
+
9786
+ - ``"nearest"``: Nearest neighbor interpolation. Each output pixel is assigned the value of the
9787
+ nearest input pixel. This method is simple and fast but can result in blocky or pixelated outputs.
9788
+ - ``"bilinear"``: Bilinear interpolation. Each output pixel is a weighted average of the four nearest input
9789
+ pixels, computed using bilinear interpolation. This method produces smoother results compared
9790
+ to nearest neighbor interpolation.
9791
+
9697
9792
  padding_mode (str, optional): An optional string specifying the pad method.
9698
9793
  The optional values are ``"zeros"`` , ``"border"`` or ``"reflection"`` . Default: ``"zeros"`` .
9794
+ When the sampling grid is outside input's bounds, effects of various padding modes are as follows:
9795
+
9796
+ - ``"zeros"``: Pads the input tensor with zeros.
9797
+ - ``"border"``: Pads the input tensor with the values of the pixels on the border of the tensor.
9798
+ - ``"reflection"``: Pads the input tensor by reflecting the values of the pixels at the
9799
+ boundary of the tensor.
9800
+
9699
9801
  align_corners (bool, optional): An optional bool specifying alignment method. If set to ``True`` ,
9700
9802
  the extrema (-1 and 1) are considered as referring to
9701
9803
  the center points of the input’s corner pixels. If set to ``False`` , they are instead considered as
@@ -10178,8 +10280,12 @@ class TripletMarginLoss(Primitive):
10178
10280
  p (int, optional): The norm degree for pairwise distance. Default: ``2`` .
10179
10281
  eps (float, optional): Default: ``1e-6`` .
10180
10282
  swap (bool, optional): The distance swap. Default: ``False`` .
10181
- reduction (str, optional): Apply specific reduction method to the
10182
- output: ``"none"`` , ``"mean"`` , ``"sum"`` . Default: ``"mean"`` .
10283
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
10284
+ ``'sum'`` . Default: ``'mean'`` .
10285
+
10286
+ - ``'none'``: no reduction will be applied.
10287
+ - ``'mean'``: compute and return the mean of elements in the output.
10288
+ - ``'sum'``: the output elements will be summed.
10183
10289
 
10184
10290
  Inputs:
10185
10291
  - **x** (Tensor) - A sample randomly selected from the training set. Data type must be BasicType.
@@ -10190,7 +10296,7 @@ class TripletMarginLoss(Primitive):
10190
10296
  - **margin** (Tensor) - Make a margin between the positive pair and the negative pair.
10191
10297
 
10192
10298
  Outputs:
10193
- Union[Tensor, Scalar], if `reduction` is "none", its shape is :math:`(N)`.
10299
+ Union[Tensor, Scalar], if `reduction` is ``"none"``, its shape is :math:`(N)`.
10194
10300
  Otherwise, a scalar value will be returned.
10195
10301
 
10196
10302
  Raises:
@@ -10207,7 +10313,7 @@ class TripletMarginLoss(Primitive):
10207
10313
  is bigger than or equal to 8.
10208
10314
  ValueError: If length of shape of `margin` is not 0.
10209
10315
  ValueError: If shape of `x`, `positive` and `negative` cannot broadcast.
10210
- ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
10316
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
10211
10317
 
10212
10318
  Supported Platforms:
10213
10319
  ``GPU``
@@ -10303,6 +10409,13 @@ class GridSampler2D(Primitive):
10303
10409
  interpolation_mode (str, optional): An optional string specifying the interpolation method.
10304
10410
  The optional values are
10305
10411
  ``"bilinear"`` or ``"nearest"`` . Default: ``"bilinear"`` .
10412
+
10413
+ - ``"nearest"``: Nearest neighbor interpolation. Each output pixel is assigned the value of the
10414
+ nearest input pixel. This method is simple and fast but can result in blocky or pixelated outputs.
10415
+ - ``"bilinear"``: Bilinear interpolation. Each output pixel is a weighted average of the four nearest input
10416
+ pixels, computed using bilinear interpolation. This method produces smoother results compared
10417
+ to nearest neighbor interpolation.
10418
+
10306
10419
  padding_mode (str, optional): An optional string specifying the pad method.
10307
10420
  The optional values are ``"zeros"`` , ``"border"`` or ``"reflection"`` . Default: ``"zeros"`` .
10308
10421
  When the sampling grid is outside input's bounds, effects of various padding modes are as follows:
@@ -10317,8 +10430,12 @@ class GridSampler2D(Primitive):
10317
10430
  and output tensors are aligned. When set to ``False`` , it is not aligned. Default: ``False`` .
10318
10431
 
10319
10432
  Inputs:
10320
- - **input_x** (Tensor) - A 4-D tensor with dtype of float16, float32 or float64 and shape of
10321
- :math:`(N, C, H_{in}, W_{in})`.
10433
+ - **input_x** (Tensor) - A 4-D tensor with shape
10434
+ :math:`(N, C, H_{in}, W_{in})`. Supported dtypes:
10435
+
10436
+ - Ascend: float16, float32.
10437
+ - GPU/CPU: float16, float32, float64.
10438
+
10322
10439
  - **grid** (Tensor) - A 4-D tensor whose dtype is the same as `input_x` and whose shape is
10323
10440
  :math:`(N, H_{out}, W_{out}, 2)`.
10324
10441
  Used to specify the sampling pixel locations normalized by the input spatial
@@ -10409,7 +10526,7 @@ class UpsampleNearest3D(Primitive):
10409
10526
  This operator scale up the volumetric input with specified `output_size` or `scales` factors, using nearest
10410
10527
  neighbor algorithm.
10411
10528
 
10412
- One of `output_size` or `scales` must be given, and can not be specified both.
10529
+ One of `output_size` or `scales` must be given, and can not specified both at the same time.
10413
10530
 
10414
10531
  Inputs:
10415
10532
  - **x** (Tensor) - 5D tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`.
@@ -11116,46 +11233,45 @@ class Dense(Primitive):
11116
11233
  Applies dense connected operator for the input. The implement of the operation is as:
11117
11234
 
11118
11235
  .. math::
11119
- \text{output} = \text{x} * \text{w} + \text{b},
11236
+ output = x @ w ^ T + b,
11120
11237
 
11121
- where :math:`x` is the input tensor, :math:`\text{w}` is a weight matrix with the same data type as the :math:`x` ,
11122
- and :math:`\text{b}` is a bias vector with the same data type as the :math:`x` (only if has_bias is True).
11123
-
11124
- Args:
11125
- has_bias (bool): Specifies whether the layer uses a bias vector :math:`\text{b}`. Default: True.
11238
+ where :math:`x` is the input tensor, :math:`w` is a weight matrix with the same data type as the :math:`x` ,
11239
+ and :math:`b` is a bias vector with the same data type as the :math:`x` (only if `b` is not ``None``).
11126
11240
 
11127
11241
  Inputs:
11128
- - **x** (Union[Tensor, Parameter]) - The input tensor with data type of float16, float32 or float64.
11129
- - **w** (Union[Tensor, Parameter]) - The weight tensor with data type of float16, float32 or float64.
11130
- - **b** (Union[Tensor, Parameter]) - The bias tensor with data type of float16, float32 or float64.
11242
+ - **x** (Tensor) - The shape must meet the following requirement: :math:`len(x.shape)>0`.
11243
+ - **w** (Tensor) - The shape must meet the following requirements:
11244
+ If :math:`len(x.shape)>1`, :math:`len(w.shape)=2`. If :math:`len(x.shape)=1`, :math:`len(w.shape)=1`.
11245
+ :math:`w.shape[-1]=x.shape[-1]`.
11246
+ - **b** (Union[Tensor, None]) - If `b` is not ``None``, the shape must meet the following requirements:
11247
+ If :math:`len(x.shape)>1`, :math:`len(b.shape)=0` or :math:`len(b.shape)=1` .
11248
+ If :math:`len(b.shape)=1`, :math:`b.shape[0]=w.shape[0]`.
11249
+ If :math:`len(x.shape)=1`, :math:`len(b.shape)=0`.
11131
11250
 
11132
11251
  Outputs:
11133
- Tensor of shape :math:`(*x.shape[:-1], w.shape[0])`.
11134
-
11135
- Raises:
11136
- TypeError: If `has_bias` is not a bool.
11252
+ If :math:`len(x.shape)>1`, Tensor of shape :math:`(*x.shape[:-1], w.shape[0])`.
11253
+ If :math:`len(x.shape)=1`, Tensor of shape :math:`()`.
11137
11254
 
11138
11255
  Supported Platforms:
11139
- ``GPU``
11256
+ ``Ascend`` ``GPU`` ``CPU``
11140
11257
 
11141
11258
  Examples:
11142
- >>> from mindspore.ops.operations import nn_ops
11259
+ >>> import numpy as np
11260
+ >>> from mindspore import Tensor, ops
11143
11261
  >>> x = Tensor(np.random.random((4, 5, 6, 7)).astype(np.float32))
11144
- >>> weight = Parameter(np.random.random((6, 7)).astype(np.float32))
11145
- >>> bias = Parameter(np.random.random((6,)).astype(np.float32))
11146
- >>> dense = nn_ops.Dense()
11262
+ >>> weight = Tensor(np.random.random((6, 7)).astype(np.float32))
11263
+ >>> bias = Tensor(np.random.random((6,)).astype(np.float32))
11264
+ >>> dense = ops.Dense()
11147
11265
  >>> output = dense(x, weight, bias)
11148
11266
  >>> print(output.shape)
11149
11267
  (4, 5, 6, 6)
11150
11268
  """
11151
11269
 
11152
11270
  @prim_attr_register
11153
- def __init__(self, has_bias=True):
11271
+ def __init__(self):
11154
11272
  """Initialize Dense."""
11155
11273
  self.init_prim_io_names(inputs=['x', 'w', 'b'], outputs=["output"])
11156
- self.has_bias = has_bias
11157
- self.has_bias = validator.check_bool(has_bias, "has_bias", "Dense")
11158
- self.add_prim_attr("has_bias", self.has_bias)
11274
+ self.add_prim_attr("has_bias", True)
11159
11275
 
11160
11276
 
11161
11277
  class WKV(Primitive):
@@ -11166,22 +11282,22 @@ class WKV(Primitive):
11166
11282
 
11167
11283
  Inputs:
11168
11284
  - **w** (Tensor) - The time_first tensor with data type of float32.
11169
- Input tensor of shape :math:`(hidden_size,)`.
11285
+ Input tensor of shape :math:`(hidden\_size,)`.
11170
11286
  - **u** (Tensor]) - The time_decay tensor with data type of float32.
11171
- Input tensor of shape :math:`(hidden_size,)`.
11287
+ Input tensor of shape :math:`(hidden\_size,)`.
11172
11288
  - **k** (Tensor) - The key tensor with data type of float32.
11173
- Input tensor of shape :math:`(batch_size, seq_length, hidden_size)`.
11289
+ Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
11174
11290
  - **v** (Tensor) - The value tensor with data type of float32.
11175
- Input tensor of shape :math:`(batch_size, seq_length, hidden_size)`.
11291
+ Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
11176
11292
  - **sp** (Tensor) - The states_p tensor with data type of float32.
11177
- Input tensor of shape :math:`(batch_size, seq_length, hidden_size)`.
11293
+ Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
11178
11294
  - **sq** (Tensor) - The states_q tensor with data type of float32.
11179
- Input tensor of shape :math:`(batch_size, hidden_size)`.
11295
+ Input tensor of shape :math:`(batch\_size, hidden\_size)`.
11180
11296
  - **sm** (Tensor) - The states_m tensor with data type of float32.
11181
- Input tensor of shape :math:`(batch_size, hidden_size)`.
11297
+ Input tensor of shape :math:`(batch\_size, hidden\_size)`.
11182
11298
 
11183
11299
  Outputs:
11184
- Tensor of shape :math:`(batch_size, seq_length, hidden_size)`.
11300
+ Tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
11185
11301
 
11186
11302
  Supported Platforms:
11187
11303
  ``Ascend``
@@ -11209,3 +11325,255 @@ class WKV(Primitive):
11209
11325
  """Initialize WKV."""
11210
11326
  self.init_prim_io_names(inputs=["time_first", "time_decay", "key", "value", "sp", "sq", "sm"],
11211
11327
  outputs=["output", "out_sp", "out_sq", "out_sm"])
11328
+
11329
+
11330
+ class PromptFlashAttention(Primitive):
11331
+ r"""
11332
+ The interface for fully inference.
11333
+ B -- Batch size
11334
+ S -- Sequence length
11335
+ H -- Hidden size
11336
+
11337
+ Refer to :func:mindspore.ops.prompt_flash_attention for more detail.
11338
+
11339
+ .. warning::
11340
+ This is an experimental API that is subject to change or deletion.
11341
+
11342
+ Args:
11343
+ num_heads (int): The number of heads.
11344
+ scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
11345
+ Muls in the calculation. Default: 1.0.
11346
+ pre_tokens (int): Previous tokens. Default: 2147483547.
11347
+ next_tokens (int): next tokens. Default: 0.
11348
+ indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
11349
+ indicates that the data blocks in the upper triangle are not involved in the calculation
11350
+ input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
11351
+ num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
11352
+ The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
11353
+ sparse_mode (int): Default: 0
11354
+
11355
+ Inputs:
11356
+ - **query** (Tensor) - The query tensor with data type of float16 or float32.
11357
+ Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
11358
+ - **key** (Tensor) - The key tensor with data type of float16 or float32.
11359
+ Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
11360
+ - **value** (Tensor) - The value tensor with data type of float16 or float32.
11361
+ Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
11362
+ - **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or float32.
11363
+ For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
11364
+ - **actual_seq_lengths** (Tensor): Describe actual sequence length of each input with data type of int.
11365
+ - **actual_seq_lengths_kv** (Tensor): Describe actual sequence length of each input with data type of int.
11366
+ - **padding_mask** (Tensor) - The padding mask tensor with data type of float16 or float32
11367
+ - **dep_scale1** (Tensor)
11368
+ - **quant_scale1** (Tensor)
11369
+ - **deq_scale2** (Tensor)
11370
+ - **quant_scale2** (Tensor)
11371
+ - **quant_offset2** (Tensor)
11372
+
11373
+
11374
+ Outputs:
11375
+ - **attention_out** (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
11376
+
11377
+ Supported Platforms:
11378
+ ``Ascend``
11379
+
11380
+ Examples:
11381
+ >>> import mindspore.ops.operations.nn_ops as P
11382
+ >>> from mindspore import Tensor
11383
+ >>> import numpy as np
11384
+ >>> B = 1
11385
+ >>> N = 16
11386
+ >>> S = 256
11387
+ >>> D = 16
11388
+ >>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
11389
+ >>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
11390
+ >>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
11391
+ >>> pfa = P.PromptFlashAttention(N, input_layout='BNSD')
11392
+ >>> out = pfa(query, key, value, None, None, None, None, None, None, None, None, None)
11393
+ >>> print(out[0].shape)
11394
+ (1, 16, 256, 16)
11395
+ """
11396
+
11397
+ @prim_attr_register
11398
+ def __init__(self, num_heads, scale_value=1.0, pre_tokens=2147483547, next_tokens=0, input_layout='BSH',
11399
+ num_key_value_heads=0, sparse_mode=0):
11400
+ """Initialize PromptFlashAttention."""
11401
+ validator.check_value_type('num_heads', num_heads, [int], self.name)
11402
+ validator.check_value_type('scale_value', scale_value, [float], self.name)
11403
+ validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
11404
+ validator.check_value_type('next_tokens', next_tokens, [int], self.name)
11405
+ validator.check_value_type('input_layout', input_layout, [str], self.name)
11406
+ validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name)
11407
+ validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
11408
+ self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "actual_seq_lengths",
11409
+ "actual_seq_lengths_kv", "padding_mask", "deq_scale1", "quant_scale1",
11410
+ "deq_scale2", "quant_scale2", "quant_offset2"],
11411
+ outputs=["attention_out"])
11412
+
11413
+
11414
+ class FlashAttentionScore(Primitive):
11415
+ r"""
11416
+ FlashAttentionScore.
11417
+ .. warning::
11418
+ This is an experimental API that is subject to change or deletion.
11419
+ B -- Batch size
11420
+ S1 -- Sequence length of query
11421
+ S2 -- Sequence length of key and value
11422
+ N1 -- Num heads of query
11423
+ N2 -- Num heads of key and value, and N2 must be a factor of N1
11424
+ D -- head size
11425
+ H1 -- Hidden size of query, which equals to N1 * D
11426
+ H2 -- Hidden size of key and value, which equals to N2 * D
11427
+ Args:
11428
+ head_num (int): The head num of query.
11429
+ keep_prob (float): The keep probability of dropout. Default: 1.0.
11430
+ scale_value (float): The scale value. Default: 1.0.
11431
+ pre_tokens (int): Previous tokens. Default: 65536.
11432
+ next_tokens (int): Next tokens. Default: 65536.
11433
+ inner_precise (int): Specify the execution mode, where 0 indicates high precision mode and 1 indicates high
11434
+ performance mode. Only support 0 currently. Default: 0.
11435
+ input_layout (str, optional): Specifies the layout of `query`, the value must be one of ["BSH", "BNSD"].
11436
+ Default: "BSH".
11437
+ sparse_mode (int): Default 0.
11438
+
11439
+ Inputs:
11440
+ - **query** (Tensor[float16, float32, bfloat16]) - The query tensor.
11441
+ Input tensor of shape :math:`(B, S1, H1)` or `(B, N1, S1, D)`.
11442
+ - **key** (Tensor[float16, float32, bfloat16]) - The key tensor.
11443
+ Input tensor of shape :math:`(B, S2, H2)` or `(B, N2, S2, D)`.
11444
+ - **value** (Tensor[float16, float32, bfloat16]) - The value tensor.
11445
+ Input tensor of shape :math:`(B, S2, H2)` or `(B, N2, S2, D)`.
11446
+ - **real_shift** (Tensor[float16, float32, bfloat16], None) - The position embedding code.
11447
+ Input tensor of shape :math: `(B, N1, S1, S2)` or `(B, N1, 1, S2)`.
11448
+ - **drop_mask** (Tensor[uint8], None) - The dropout mask tensor.
11449
+ Input tensor of shape :math:`(B, N1, S1, S2 // 8) or None`.
11450
+ - **padding_mask** (None) - The padding mask of float16 or float32, not implemented yet.
11451
+ - **attn_mask** (Tensor[uint8], None) - The attention mask tensor.
11452
+ For each element, 0 indicates retention and 1 indicates discard.
11453
+ Input tensor of shape :math:`(B, N1, S1, S2)`, `(B, 1, S1, S2)` or `(S1, S2)`.
11454
+ - **prefix** (Tensor[int64], None) - Not implemented yet.
11455
+ Input tensor of shape :math:`(B,)`.
11456
+
11457
+ Outputs:
11458
+ - **softmax_max** (Tensor[float32]) - (B, N1, S1, 8)
11459
+ - **softmax_sum** (Tensor[float32]) - (B, N1, S1, 8)
11460
+ - **softmax_out** (Tensor[float32]) - Useless output, ignore it. Output tensor of shape : `()`
11461
+ - **attention_out** (Tensor[float16, float32, bfloat16]) - The output of attention, its shape, and data type
11462
+ are the same as the query.
11463
+
11464
+ Supported Platforms:
11465
+ ``Ascend``
11466
+ """
11467
+
11468
+ @prim_attr_register
11469
+ def __init__(self, head_num, keep_prob=1.0, scale_value=1.0, pre_tokens=65536, next_tokens=65536, inner_precise=0,
11470
+ input_layout="BSH", sparse_mode=0):
11471
+ """Initialize FlashAttentionScore"""
11472
+ validator.check_value_type('head_num', head_num, [int], self.name)
11473
+ validator.check_value_type('keep_prob', keep_prob, [int, float], self.name)
11474
+ validator.check_float(keep_prob, 0.0, validator.GE, "keep_prob", self.name)
11475
+ validator.check_float(keep_prob, 1.0, validator.LE, "keep_prob", self.name)
11476
+ validator.check_value_type('scale_value', scale_value, [float], self.name)
11477
+ validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
11478
+ validator.check_value_type('next_tokens', next_tokens, [int], self.name)
11479
+ validator.check_value_type('inner_precise', inner_precise, [int], self.name)
11480
+ validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
11481
+ if inner_precise not in [0]:
11482
+ raise ValueError(f"Attribute 'inner_precise' must be 0, but got {inner_precise}")
11483
+ validator.check_value_type('input_layout', input_layout, [str], self.name)
11484
+ if input_layout not in ["BSH", "BNSD"]:
11485
+ raise ValueError(f"Attribute 'input_layout' must be either 'BSH' or 'BNSD', but got {input_layout}")
11486
+ self.init_prim_io_names(
11487
+ inputs=['query', 'key', 'value', 'real_shift', 'drop_mask', 'padding_mask', 'attn_mask', 'prefix'],
11488
+ outputs=['softmax_max', 'softmax_sum', 'softmax_out', 'attention_out'])
11489
+
11490
+
11491
+ class RmsNorm(Primitive):
11492
+ r"""
11493
+ The RmsNorm operator is a normalization operation, and its formula is:
11494
+
11495
+ .. math::
11496
+ y=\frac{x_i}{\sqrt{\frac{1}{n}}\sum_{i=1}^{n}{ x_i^2}+\varepsilon }\gamma_i
11497
+
11498
+ .. warning::
11499
+ This is an experimental API that is subject to change or deletion.
11500
+
11501
+ Args:
11502
+ epsilon (float): prevent division by 0, default value is `1e-6`
11503
+
11504
+ Inputs:
11505
+ - **input_x** (Tensor) - Input data of RmsNorm, support data type: float16, float32, bfloat16.
11506
+ - **gamma** (Tensor) - Support data type: float16, float32, bfloat16.
11507
+
11508
+ Outputs:
11509
+ - **y** (Tensor) - Has the same type and shape with `input_x`.
11510
+ - **rstd** (Tensor) - Has the same type with `input_x`, used by gradient calculation.
11511
+
11512
+ Raises:
11513
+ TypeError: If data type of `input_x` is not one of the following: float16, float32, bfloat16.
11514
+ TypeError: If data type of `gamma` is not one of the following: float16, float32, bfloat16.
11515
+ TypeError: If data type of "input_x" is not the same with the data type of "gamma"
11516
+
11517
+ Supported Platforms:
11518
+ ``Ascend``
11519
+ """
11520
+
11521
+ @prim_attr_register
11522
+ def __init__(self, epsilon=1e-6):
11523
+ """Initialize Dense."""
11524
+ validator.check_value_type("epsilon", epsilon, [float], self.name)
11525
+ self.init_prim_io_names(inputs=['x', 'gamma'], outputs=["y", "rstd"])
11526
+
11527
+
11528
+ class PagedAttention(Primitive):
11529
+ r"""
11530
+ .. warning::
11531
+ This is an experimental API that is subject to change or deletion.
11532
+ """
11533
+ @prim_attr_register
11534
+ def __init__(self, head_num, scale_value=1.0, kv_head_num=0):
11535
+ """Initialize PagedAttention"""
11536
+ validator.check_value_type('head_num', head_num, [int], self.name)
11537
+ validator.check_value_type('scale_value', scale_value, [float], self.name) # scale after qkbmm
11538
+ validator.check_value_type('kv_head_num', kv_head_num, [int], self.name) # for MQA
11539
+ self.init_prim_io_names(
11540
+ inputs=['query', 'key_cache', 'value_cache', 'block_tables', 'context_lens'],
11541
+ outputs=['attention_out'])
11542
+
11543
+
11544
+ class PagedAttentionMask(Primitive):
11545
+ r"""
11546
+ .. warning::
11547
+ This is an experimental API that is subject to change or deletion.
11548
+ """
11549
+ @prim_attr_register
11550
+ def __init__(self, head_num, scale_value=1.0, kv_head_num=0):
11551
+ """Initialize PagedAttentionMask"""
11552
+ validator.check_value_type('head_num', head_num, [int], self.name)
11553
+ validator.check_value_type('scale_value', scale_value, [float], self.name) # scale after qkbmm
11554
+ validator.check_value_type('kv_head_num', kv_head_num, [int], self.name) # for MQA
11555
+ self.init_prim_io_names(
11556
+ inputs=['query', 'key_cache', 'value_cache', 'block_tables', 'context_lens', 'alibi_mask'],
11557
+ outputs=['attention_out'])
11558
+
11559
+
11560
+ class ReshapeAndCache(Primitive):
11561
+ r"""
11562
+ .. warning::
11563
+ This is an experimental API that is subject to change or deletion.
11564
+ """
11565
+ __mindspore_signature__ = (
11566
+ sig.make_sig('key', dtype=sig.sig_dtype.T),
11567
+ sig.make_sig('value', dtype=sig.sig_dtype.T),
11568
+ sig.make_sig('key_cache', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
11569
+ sig.make_sig('value_cache', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
11570
+ sig.make_sig('slot_mapping', dtype=sig.sig_dtype.T1),
11571
+ )
11572
+
11573
+ @prim_attr_register
11574
+ def __init__(self):
11575
+ """Initialize ReshapeAndCache"""
11576
+ self.init_prim_io_names(
11577
+ inputs=['key', 'value', 'key_cache', 'value_cache', 'slot_mapping'],
11578
+ outputs=['key_out'])
11579
+ self.add_prim_attr('side_effect_mem', True)