mindspore 1.10.0__cp37-cp37m-win_amd64.whl → 2.0.0rc1__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (966) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/ConcurrencyCheck.dll +0 -0
  3. mindspore/CppBuildInsights.dll +0 -0
  4. mindspore/CppCoreCheck.dll +0 -0
  5. mindspore/EnumIndex.dll +0 -0
  6. mindspore/EspXEngine.dll +0 -0
  7. mindspore/HResultCheck.dll +0 -0
  8. mindspore/KernelTraceControl.dll +0 -0
  9. mindspore/LocalESPC.dll +0 -0
  10. mindspore/Microsoft.Diagnostics.Tracing.EventSource.dll +0 -0
  11. mindspore/Microsoft.VisualStudio.RemoteControl.dll +0 -0
  12. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  13. mindspore/Microsoft.VisualStudio.Utilities.Internal.dll +0 -0
  14. mindspore/Newtonsoft.Json.dll +0 -0
  15. mindspore/System.Runtime.CompilerServices.Unsafe.dll +0 -0
  16. mindspore/VariantClear.dll +0 -0
  17. mindspore/__init__.py +9 -4
  18. mindspore/_c_dataengine.cp37-win_amd64.pyd +0 -0
  19. mindspore/_c_expression.cp37-win_amd64.pyd +0 -0
  20. mindspore/_c_mindrecord.cp37-win_amd64.pyd +0 -0
  21. mindspore/_check_jit_forbidden_api.py +102 -0
  22. mindspore/_checkparam.py +1066 -1001
  23. mindspore/_extends/builtin_operations.py +32 -4
  24. mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
  25. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
  26. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
  27. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
  28. mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
  29. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
  30. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
  31. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
  32. mindspore/_extends/parse/__init__.py +5 -3
  33. mindspore/_extends/parse/namespace.py +17 -2
  34. mindspore/_extends/parse/parser.py +193 -34
  35. mindspore/_extends/parse/resources.py +7 -8
  36. mindspore/_extends/parse/standard_method.py +1780 -435
  37. mindspore/_extends/parse/trope.py +3 -1
  38. mindspore/amp.py +53 -58
  39. mindspore/atlprov.dll +0 -0
  40. mindspore/boost/adasum.py +3 -2
  41. mindspore/boost/boost.py +2 -2
  42. mindspore/boost/boost_cell_wrapper.py +46 -26
  43. mindspore/boost/dim_reduce.py +6 -5
  44. mindspore/boost/grad_accumulation.py +2 -1
  45. mindspore/boost/group_loss_scale_manager.py +1 -1
  46. mindspore/c1.dll +0 -0
  47. mindspore/c1xx.dll +0 -0
  48. mindspore/c2.dll +0 -0
  49. mindspore/cfgpersist.dll +0 -0
  50. mindspore/clang_rt.asan_dbg_dynamic-x86_64.dll +0 -0
  51. mindspore/clang_rt.asan_dynamic-x86_64.dll +0 -0
  52. mindspore/common/__init__.py +11 -10
  53. mindspore/common/_decorator.py +2 -0
  54. mindspore/common/_register_for_adapter.py +55 -0
  55. mindspore/common/_stub_tensor.py +201 -0
  56. mindspore/common/_utils.py +57 -0
  57. mindspore/common/api.py +582 -297
  58. mindspore/common/dtype.py +66 -18
  59. mindspore/common/dump.py +2 -2
  60. mindspore/common/initializer.py +38 -1
  61. mindspore/common/jit_config.py +25 -13
  62. mindspore/common/mutable.py +53 -24
  63. mindspore/common/parameter.py +60 -37
  64. mindspore/common/seed.py +8 -24
  65. mindspore/common/sparse_tensor.py +927 -0
  66. mindspore/common/tensor.py +1627 -3900
  67. mindspore/communication/__init__.py +10 -5
  68. mindspore/communication/_comm_helper.py +78 -214
  69. mindspore/communication/_hccl_management.py +2 -1
  70. mindspore/communication/management.py +136 -47
  71. mindspore/config/op_info.config +501 -1008
  72. mindspore/context.py +291 -56
  73. mindspore/d3dcompiler_47.dll +0 -0
  74. mindspore/dataset/__init__.py +12 -8
  75. mindspore/dataset/audio/__init__.py +9 -9
  76. mindspore/dataset/audio/transforms.py +1090 -228
  77. mindspore/dataset/audio/utils.py +87 -39
  78. mindspore/dataset/audio/validators.py +223 -1
  79. mindspore/dataset/callback/ds_callback.py +17 -15
  80. mindspore/dataset/core/config.py +246 -17
  81. mindspore/dataset/core/py_util_helpers.py +4 -3
  82. mindspore/dataset/core/validator_helpers.py +10 -10
  83. mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
  84. mindspore/dataset/debug/debug_hook.py +65 -0
  85. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  86. mindspore/dataset/engine/__init__.py +7 -3
  87. mindspore/dataset/engine/cache_client.py +9 -9
  88. mindspore/dataset/engine/datasets.py +648 -477
  89. mindspore/dataset/engine/datasets_audio.py +165 -167
  90. mindspore/dataset/engine/datasets_standard_format.py +93 -67
  91. mindspore/dataset/engine/datasets_text.py +492 -342
  92. mindspore/dataset/engine/datasets_user_defined.py +85 -50
  93. mindspore/dataset/engine/datasets_vision.py +1224 -699
  94. mindspore/dataset/engine/graphdata.py +134 -69
  95. mindspore/dataset/engine/iterators.py +50 -9
  96. mindspore/dataset/engine/offload.py +52 -31
  97. mindspore/dataset/engine/samplers.py +27 -24
  98. mindspore/dataset/engine/serializer_deserializer.py +14 -15
  99. mindspore/dataset/engine/validators.py +213 -52
  100. mindspore/dataset/text/__init__.py +10 -8
  101. mindspore/dataset/text/transforms.py +152 -57
  102. mindspore/dataset/text/utils.py +98 -49
  103. mindspore/dataset/text/validators.py +25 -0
  104. mindspore/dataset/transforms/__init__.py +4 -2
  105. mindspore/dataset/transforms/c_transforms.py +11 -13
  106. mindspore/dataset/transforms/py_transforms.py +2 -2
  107. mindspore/dataset/transforms/py_transforms_util.py +10 -0
  108. mindspore/dataset/transforms/transforms.py +13 -15
  109. mindspore/dataset/transforms/validators.py +7 -7
  110. mindspore/dataset/utils/__init__.py +2 -1
  111. mindspore/dataset/utils/browse_dataset.py +13 -13
  112. mindspore/dataset/utils/line_reader.py +121 -0
  113. mindspore/dataset/vision/__init__.py +8 -7
  114. mindspore/dataset/vision/c_transforms.py +125 -126
  115. mindspore/dataset/vision/py_transforms.py +37 -37
  116. mindspore/dataset/vision/py_transforms_util.py +23 -20
  117. mindspore/dataset/vision/transforms.py +316 -315
  118. mindspore/dataset/vision/utils.py +313 -17
  119. mindspore/dataset/vision/validators.py +6 -6
  120. mindspore/default_config.py +0 -1
  121. mindspore/dpcmi.dll +0 -0
  122. mindspore/{compression → experimental}/__init__.py +6 -5
  123. mindspore/experimental/map_parameter.py +275 -0
  124. mindspore/include/OWNERS +0 -1
  125. mindspore/include/api/callback/callback.h +9 -13
  126. mindspore/include/api/callback/ckpt_saver.h +2 -2
  127. mindspore/include/api/callback/loss_monitor.h +2 -2
  128. mindspore/include/api/callback/lr_scheduler.h +5 -5
  129. mindspore/include/api/callback/time_monitor.h +2 -2
  130. mindspore/include/api/callback/train_accuracy.h +4 -6
  131. mindspore/include/api/cfg.h +19 -6
  132. mindspore/include/api/context.h +70 -9
  133. mindspore/include/api/delegate.h +8 -1
  134. mindspore/include/api/dual_abi_helper.h +8 -24
  135. mindspore/include/api/metrics/accuracy.h +2 -2
  136. mindspore/include/api/metrics/metrics.h +4 -3
  137. mindspore/include/api/model.h +9 -4
  138. mindspore/include/api/model_group.h +68 -0
  139. mindspore/include/api/model_parallel_runner.h +17 -17
  140. mindspore/include/api/net.h +12 -11
  141. mindspore/include/api/serialization.h +20 -4
  142. mindspore/include/api/status.h +7 -1
  143. mindspore/include/api/types.h +25 -21
  144. mindspore/include/api/visible.h +4 -0
  145. mindspore/include/c_api/model_c.h +5 -0
  146. mindspore/include/c_api/status_c.h +1 -1
  147. mindspore/include/dataset/config.h +1 -1
  148. mindspore/include/dataset/constants.h +14 -0
  149. mindspore/include/dataset/text.h +59 -0
  150. mindspore/include/dataset/vision.h +56 -117
  151. mindspore/include/dataset/vision_lite.h +102 -0
  152. mindspore/jpeg62.dll +0 -0
  153. mindspore/log.py +28 -28
  154. mindspore/mindrecord/common/exceptions.py +2 -4
  155. mindspore/mindrecord/filereader.py +19 -1
  156. mindspore/mindrecord/filewriter.py +250 -88
  157. mindspore/mindrecord/mindpage.py +13 -13
  158. mindspore/mindrecord/shardheader.py +15 -15
  159. mindspore/mindrecord/shardreader.py +9 -0
  160. mindspore/mindrecord/shardwriter.py +29 -29
  161. mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
  162. mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
  163. mindspore/mindrecord/tools/csv_to_mr.py +4 -4
  164. mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
  165. mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
  166. mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
  167. mindspore/{libmindspore_backend.dll → mindspore_backend.dll} +0 -0
  168. mindspore/mindspore_common.dll +0 -0
  169. mindspore/mindspore_core.dll +0 -0
  170. mindspore/mindspore_glog.dll +0 -0
  171. mindspore/mindspore_shared_lib.dll +0 -0
  172. mindspore/msobj140.dll +0 -0
  173. mindspore/mspdb140.dll +0 -0
  174. mindspore/mspdbcore.dll +0 -0
  175. mindspore/mspdbst.dll +0 -0
  176. mindspore/mspft140.dll +0 -0
  177. mindspore/msvcdis140.dll +0 -0
  178. mindspore/msvcp140_1.dll +0 -0
  179. mindspore/msvcp140_2.dll +0 -0
  180. mindspore/msvcp140_atomic_wait.dll +0 -0
  181. mindspore/msvcp140_codecvt_ids.dll +0 -0
  182. mindspore/nn/__init__.py +1 -5
  183. mindspore/nn/cell.py +297 -234
  184. mindspore/nn/dynamic_lr.py +1 -1
  185. mindspore/nn/grad/cell_grad.py +17 -42
  186. mindspore/nn/layer/__init__.py +7 -4
  187. mindspore/nn/layer/activation.py +131 -88
  188. mindspore/nn/layer/basic.py +313 -613
  189. mindspore/nn/layer/channel_shuffle.py +103 -0
  190. mindspore/nn/layer/combined.py +1 -1
  191. mindspore/nn/layer/container.py +52 -6
  192. mindspore/nn/layer/conv.py +112 -43
  193. mindspore/nn/layer/dense.py +10 -9
  194. mindspore/nn/layer/embedding.py +36 -34
  195. mindspore/nn/layer/image.py +123 -27
  196. mindspore/nn/layer/math.py +108 -107
  197. mindspore/nn/layer/normalization.py +212 -366
  198. mindspore/nn/layer/padding.py +370 -42
  199. mindspore/nn/layer/pooling.py +1443 -219
  200. mindspore/nn/layer/rnn_cells.py +11 -16
  201. mindspore/nn/layer/rnns.py +38 -39
  202. mindspore/nn/layer/thor_layer.py +24 -25
  203. mindspore/nn/layer/timedistributed.py +5 -5
  204. mindspore/nn/layer/transformer.py +701 -0
  205. mindspore/nn/learning_rate_schedule.py +8 -8
  206. mindspore/nn/loss/__init__.py +9 -6
  207. mindspore/nn/loss/loss.py +678 -142
  208. mindspore/nn/metrics.py +53 -0
  209. mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
  210. mindspore/nn/optim/ada_grad.py +8 -8
  211. mindspore/nn/optim/adadelta.py +2 -3
  212. mindspore/nn/optim/adafactor.py +18 -14
  213. mindspore/nn/optim/adam.py +429 -87
  214. mindspore/nn/optim/adamax.py +5 -6
  215. mindspore/nn/optim/adasum.py +10 -8
  216. mindspore/nn/optim/asgd.py +7 -7
  217. mindspore/nn/optim/ftrl.py +81 -11
  218. mindspore/nn/optim/lamb.py +7 -8
  219. mindspore/nn/optim/lars.py +4 -4
  220. mindspore/nn/optim/lazyadam.py +82 -7
  221. mindspore/nn/optim/momentum.py +8 -7
  222. mindspore/nn/optim/optimizer.py +19 -10
  223. mindspore/nn/optim/proximal_ada_grad.py +6 -5
  224. mindspore/nn/optim/rmsprop.py +3 -3
  225. mindspore/nn/optim/rprop.py +20 -16
  226. mindspore/nn/optim/sgd.py +21 -15
  227. mindspore/nn/optim/thor.py +23 -21
  228. mindspore/nn/probability/__init__.py +0 -2
  229. mindspore/nn/probability/bijector/bijector.py +7 -6
  230. mindspore/nn/probability/bijector/invert.py +4 -2
  231. mindspore/nn/probability/bijector/softplus.py +2 -2
  232. mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
  233. mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
  234. mindspore/nn/probability/distribution/__init__.py +6 -0
  235. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
  236. mindspore/nn/probability/distribution/_utils/utils.py +11 -17
  237. mindspore/nn/probability/distribution/bernoulli.py +6 -6
  238. mindspore/nn/probability/distribution/beta.py +1 -1
  239. mindspore/nn/probability/distribution/categorical.py +9 -9
  240. mindspore/nn/probability/distribution/cauchy.py +8 -8
  241. mindspore/nn/probability/distribution/distribution.py +12 -6
  242. mindspore/nn/probability/distribution/exponential.py +5 -5
  243. mindspore/nn/probability/distribution/gamma.py +3 -3
  244. mindspore/nn/probability/distribution/geometric.py +6 -5
  245. mindspore/nn/probability/distribution/gumbel.py +5 -5
  246. mindspore/nn/probability/distribution/half_normal.py +133 -0
  247. mindspore/nn/probability/distribution/laplace.py +128 -0
  248. mindspore/nn/probability/distribution/log_normal.py +0 -1
  249. mindspore/nn/probability/distribution/logistic.py +4 -5
  250. mindspore/nn/probability/distribution/normal.py +11 -15
  251. mindspore/nn/probability/distribution/poisson.py +6 -2
  252. mindspore/nn/probability/distribution/student_t.py +150 -0
  253. mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
  254. mindspore/nn/probability/distribution/uniform.py +5 -5
  255. mindspore/nn/reinforcement/_tensors_queue.py +3 -3
  256. mindspore/nn/reinforcement/tensor_array.py +2 -2
  257. mindspore/nn/sparse/sparse.py +8 -1
  258. mindspore/nn/wrap/cell_wrapper.py +55 -27
  259. mindspore/nn/wrap/grad_reducer.py +20 -11
  260. mindspore/nn/wrap/loss_scale.py +47 -30
  261. mindspore/numpy/array_creations.py +33 -22
  262. mindspore/numpy/array_ops.py +46 -42
  263. mindspore/numpy/logic_ops.py +6 -27
  264. mindspore/numpy/math_ops.py +26 -19
  265. mindspore/numpy/utils.py +1 -8
  266. mindspore/numpy/utils_const.py +112 -62
  267. mindspore/opencv_core452.dll +0 -0
  268. mindspore/opencv_imgcodecs452.dll +0 -0
  269. mindspore/opencv_imgproc452.dll +0 -0
  270. mindspore/ops/__init__.py +6 -3
  271. mindspore/ops/_constants.py +0 -6
  272. mindspore/ops/_grad/__init__.py +2 -1
  273. mindspore/ops/_grad/grad_array_ops.py +209 -152
  274. mindspore/ops/_grad/grad_base.py +55 -17
  275. mindspore/ops/_grad/grad_clip_ops.py +11 -3
  276. mindspore/ops/_grad/grad_comm_ops.py +58 -47
  277. mindspore/ops/_grad/grad_implementations.py +21 -61
  278. mindspore/ops/_grad/grad_inner_ops.py +48 -6
  279. mindspore/ops/_grad/grad_math_ops.py +306 -161
  280. mindspore/ops/_grad/grad_nn_ops.py +192 -181
  281. mindspore/ops/_grad/grad_other_ops.py +1 -1
  282. mindspore/ops/_grad/grad_quant_ops.py +5 -5
  283. mindspore/ops/_grad/grad_sequence_ops.py +296 -0
  284. mindspore/ops/_grad/grad_sparse.py +15 -9
  285. mindspore/ops/_grad_experimental/__init__.py +1 -0
  286. mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
  287. mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
  288. mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
  289. mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
  290. mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
  291. mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
  292. mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
  293. mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
  294. mindspore/ops/_op_impl/__init__.py +3 -3
  295. mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
  296. mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
  297. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
  298. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
  299. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
  300. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
  301. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
  302. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
  303. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
  304. mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
  305. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
  306. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
  307. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
  308. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
  309. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
  310. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
  311. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
  312. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
  313. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
  314. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
  315. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
  316. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
  317. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
  318. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
  319. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
  320. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  321. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
  322. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
  323. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
  324. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
  325. mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
  326. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
  327. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
  328. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
  329. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
  330. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
  331. mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
  332. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  333. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
  334. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  335. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
  336. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
  337. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
  338. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
  339. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
  340. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  341. mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
  342. mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
  343. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  344. mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
  345. mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
  346. mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
  347. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
  348. mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
  349. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  350. mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
  351. mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
  352. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
  353. mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
  354. mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
  355. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  356. mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
  357. mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
  358. mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
  359. mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
  360. mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
  361. mindspore/ops/_op_impl/aicpu/conj.py +11 -0
  362. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
  363. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
  364. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  365. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
  366. mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
  367. mindspore/ops/_op_impl/aicpu/diag.py +36 -0
  368. mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
  369. mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
  370. mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
  371. mindspore/ops/_op_impl/aicpu/eig.py +35 -0
  372. mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
  373. mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
  374. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  375. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  376. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
  377. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  378. mindspore/ops/_op_impl/aicpu/glu.py +33 -0
  379. mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
  380. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  381. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  382. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  383. mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
  384. mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
  385. mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
  386. mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
  387. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  388. mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
  389. mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
  390. mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
  391. mindspore/ops/_op_impl/aicpu/logit.py +33 -0
  392. mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
  393. mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
  394. mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
  395. mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
  396. mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
  397. mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
  398. mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
  399. mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
  400. mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
  401. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
  402. mindspore/ops/_op_impl/aicpu/mul.py +3 -1
  403. mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
  404. mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
  405. mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
  406. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  407. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  408. mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
  409. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  410. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  411. mindspore/ops/_op_impl/aicpu/qr.py +36 -0
  412. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  413. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  414. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  415. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
  416. mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
  417. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  418. mindspore/ops/_op_impl/aicpu/range.py +36 -0
  419. mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
  420. mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
  421. mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
  422. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
  423. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
  424. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  425. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  426. mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
  427. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
  428. mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
  429. mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
  430. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  431. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  432. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  433. mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
  434. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  435. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  436. mindspore/ops/_op_impl/aicpu/sort.py +39 -0
  437. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
  438. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  439. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
  440. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
  441. mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
  442. mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
  443. mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
  444. mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
  445. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
  446. mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
  447. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
  448. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
  449. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  450. mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
  451. mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
  452. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  453. mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
  454. mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
  455. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
  456. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
  457. mindspore/ops/_op_impl/cpu/__init__.py +1 -2
  458. mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
  459. mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
  460. mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
  461. mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
  462. mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
  463. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
  464. mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
  465. mindspore/ops/_op_impl/tbe/__init__.py +27 -608
  466. mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
  467. mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
  468. mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
  469. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  470. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
  471. mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
  472. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  473. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
  474. mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
  475. mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
  476. mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
  477. mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
  478. mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
  479. mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
  480. mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
  481. mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
  482. mindspore/ops/_op_impl/tbe/cast.py +0 -2
  483. mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
  484. mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
  485. mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
  486. mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
  487. mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
  488. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
  489. mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
  490. mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
  491. mindspore/ops/_op_impl/tbe/greater.py +2 -0
  492. mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
  493. mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
  494. mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
  495. mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
  496. mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
  497. mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
  498. mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
  499. mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
  500. mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
  501. mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
  502. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
  503. mindspore/ops/_op_impl/tbe/slice.py +26 -15
  504. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  505. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
  506. mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
  507. mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
  508. mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
  509. mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
  510. mindspore/ops/_primitive_cache.py +3 -2
  511. mindspore/ops/_register_for_op.py +11 -0
  512. mindspore/ops/_utils/__init__.py +1 -1
  513. mindspore/ops/_utils/utils.py +20 -41
  514. mindspore/ops/_vmap/__init__.py +2 -2
  515. mindspore/ops/_vmap/vmap_array_ops.py +170 -78
  516. mindspore/ops/_vmap/vmap_base.py +24 -10
  517. mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
  518. mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
  519. mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
  520. mindspore/ops/_vmap/vmap_image_ops.py +52 -0
  521. mindspore/ops/_vmap/vmap_math_ops.py +77 -6
  522. mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
  523. mindspore/ops/_vmap/vmap_other_ops.py +3 -1
  524. mindspore/ops/_vmap/vmap_random_ops.py +55 -3
  525. mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
  526. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  527. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  528. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
  529. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
  530. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
  531. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
  532. mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
  533. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
  534. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
  535. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  536. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
  537. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  538. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
  539. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  540. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
  541. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
  542. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
  543. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  544. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  545. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
  546. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
  547. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
  548. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
  549. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
  550. mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
  551. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
  552. mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
  553. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
  554. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
  555. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
  556. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  557. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  558. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
  559. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
  560. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
  561. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  562. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  563. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  564. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
  565. mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
  566. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  567. mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
  568. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
  569. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
  570. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
  571. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
  572. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
  573. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
  574. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  575. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
  576. mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
  577. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
  578. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
  579. mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
  580. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  581. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
  582. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
  583. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
  584. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
  585. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
  586. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
  587. mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
  588. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  589. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
  590. mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
  591. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
  592. mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
  593. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
  594. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
  595. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
  596. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
  597. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
  598. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
  599. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
  600. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
  601. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  602. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  603. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  604. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
  605. mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
  606. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  607. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  608. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
  609. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
  610. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
  611. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
  612. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  613. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  614. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  615. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
  616. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
  617. mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
  618. mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
  619. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
  620. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  621. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
  622. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
  623. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
  624. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
  625. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
  626. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
  627. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
  628. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
  629. mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
  630. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  631. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  632. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
  633. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
  634. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
  635. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  636. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
  637. mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
  638. mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
  639. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
  640. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  641. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
  642. mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
  643. mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
  644. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
  645. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  646. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
  647. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
  648. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  649. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
  650. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
  651. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  652. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  653. mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
  654. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
  655. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
  656. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
  657. mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
  658. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  659. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
  660. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
  661. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
  662. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
  663. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  664. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  665. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
  666. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
  667. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
  668. mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
  669. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
  670. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
  671. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
  672. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
  673. mindspore/ops/bprop_mindir/__init__.py +1 -4
  674. mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
  675. mindspore/ops/composite/__init__.py +12 -13
  676. mindspore/ops/composite/base.py +261 -254
  677. mindspore/ops/composite/env_ops.py +41 -0
  678. mindspore/ops/composite/math_ops.py +197 -156
  679. mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
  680. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
  681. mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
  682. mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
  683. mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
  684. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
  685. mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
  686. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
  687. mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
  688. mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
  689. mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
  690. mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
  691. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
  692. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  693. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
  694. mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
  695. mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
  696. mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
  697. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  698. mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
  699. mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
  700. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
  701. mindspore/ops/function/__init__.py +323 -8
  702. mindspore/ops/function/array_func.py +3511 -780
  703. mindspore/ops/function/clip_func.py +329 -0
  704. mindspore/ops/function/debug_func.py +6 -6
  705. mindspore/ops/function/grad/__init__.py +5 -1
  706. mindspore/ops/function/grad/grad_func.py +736 -65
  707. mindspore/ops/function/image_func.py +270 -0
  708. mindspore/ops/function/linalg_func.py +268 -8
  709. mindspore/ops/function/math_func.py +8032 -3164
  710. mindspore/ops/function/nn_func.py +5619 -1855
  711. mindspore/ops/function/other_func.py +115 -0
  712. mindspore/ops/function/parameter_func.py +11 -10
  713. mindspore/ops/function/random_func.py +939 -77
  714. mindspore/ops/function/sparse_func.py +249 -84
  715. mindspore/ops/function/sparse_unary_func.py +2303 -0
  716. mindspore/ops/function/spectral_func.py +146 -0
  717. mindspore/ops/function/vmap_func.py +114 -0
  718. mindspore/ops/functional.py +182 -254
  719. mindspore/ops/op_info_register.py +79 -34
  720. mindspore/ops/operations/__init__.py +210 -118
  721. mindspore/ops/operations/_csr_ops.py +7 -7
  722. mindspore/ops/operations/_embedding_cache_ops.py +25 -15
  723. mindspore/ops/operations/_grad_ops.py +447 -322
  724. mindspore/ops/operations/_inner_ops.py +547 -176
  725. mindspore/ops/operations/_map_tensor_ops.py +112 -0
  726. mindspore/ops/operations/_ms_kernel.py +29 -27
  727. mindspore/ops/operations/_ocr_ops.py +11 -11
  728. mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
  729. mindspore/ops/operations/_quant_ops.py +186 -101
  730. mindspore/ops/operations/_rl_inner_ops.py +122 -61
  731. mindspore/ops/operations/_scalar_ops.py +466 -0
  732. mindspore/ops/operations/_sequence_ops.py +1047 -0
  733. mindspore/ops/operations/_tensor_array.py +10 -11
  734. mindspore/ops/operations/_thor_ops.py +4 -4
  735. mindspore/ops/operations/array_ops.py +1428 -1226
  736. mindspore/ops/operations/comm_ops.py +180 -117
  737. mindspore/ops/operations/control_ops.py +4 -2
  738. mindspore/ops/operations/custom_ops.py +185 -98
  739. mindspore/ops/operations/debug_ops.py +92 -54
  740. mindspore/ops/operations/image_ops.py +406 -211
  741. mindspore/ops/operations/inner_ops.py +42 -53
  742. mindspore/ops/operations/linalg_ops.py +32 -29
  743. mindspore/ops/operations/math_ops.py +2076 -897
  744. mindspore/ops/operations/nn_ops.py +1282 -1252
  745. mindspore/ops/operations/other_ops.py +124 -278
  746. mindspore/ops/operations/random_ops.py +345 -178
  747. mindspore/ops/operations/rl_ops.py +8 -9
  748. mindspore/ops/operations/sparse_ops.py +502 -157
  749. mindspore/ops/operations/spectral_ops.py +107 -0
  750. mindspore/ops/primitive.py +192 -15
  751. mindspore/ops/vm_impl_registry.py +23 -2
  752. mindspore/parallel/__init__.py +6 -1
  753. mindspore/parallel/_auto_parallel_context.py +199 -92
  754. mindspore/parallel/_cell_wrapper.py +4 -2
  755. mindspore/parallel/_cost_model_context.py +3 -0
  756. mindspore/parallel/_dp_allreduce_fusion.py +2 -1
  757. mindspore/parallel/_offload_context.py +185 -0
  758. mindspore/parallel/_parallel_serialization.py +167 -28
  759. mindspore/parallel/_ps_context.py +9 -5
  760. mindspore/parallel/_recovery_context.py +1 -1
  761. mindspore/parallel/_tensor.py +9 -1
  762. mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
  763. mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
  764. mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
  765. mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
  766. mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
  767. mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
  768. mindspore/parallel/_utils.py +47 -7
  769. mindspore/parallel/algo_parameter_config.py +5 -1
  770. mindspore/parallel/checkpoint_transform.py +329 -0
  771. mindspore/parallel/shard.py +229 -0
  772. mindspore/perf_msvcbuildinsights.dll +0 -0
  773. mindspore/pgodb140.dll +0 -0
  774. mindspore/pgort140.dll +0 -0
  775. mindspore/profiler/__init__.py +2 -1
  776. mindspore/profiler/common/util.py +4 -3
  777. mindspore/profiler/common/validator/validate_path.py +2 -2
  778. mindspore/profiler/envprofiling.py +249 -0
  779. mindspore/profiler/parser/aicpu_data_parser.py +38 -39
  780. mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
  781. mindspore/profiler/parser/base_timeline_generator.py +471 -0
  782. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
  783. mindspore/profiler/parser/framework_parser.py +42 -16
  784. mindspore/profiler/parser/hccl_parser.py +158 -158
  785. mindspore/profiler/parser/hwts_log_parser.py +7 -6
  786. mindspore/profiler/parser/integrator.py +18 -1579
  787. mindspore/profiler/parser/minddata_analyzer.py +8 -8
  788. mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
  789. mindspore/profiler/parser/msadvisor_parser.py +2 -4
  790. mindspore/profiler/parser/optime_parser.py +17 -18
  791. mindspore/profiler/parser/profiler_info.py +108 -0
  792. mindspore/profiler/parser/step_trace_parser.py +1 -1
  793. mindspore/profiler/profiling.py +396 -194
  794. mindspore/rewrite/__init__.py +6 -2
  795. mindspore/rewrite/api/node.py +51 -110
  796. mindspore/rewrite/api/node_type.py +10 -6
  797. mindspore/rewrite/api/pattern_engine.py +51 -7
  798. mindspore/rewrite/api/scoped_value.py +64 -53
  799. mindspore/rewrite/api/symbol_tree.py +108 -61
  800. mindspore/rewrite/api/tree_node_helper.py +2 -3
  801. mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
  802. mindspore/rewrite/ast_helpers/__init__.py +6 -3
  803. mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
  804. mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
  805. mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
  806. mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
  807. mindspore/rewrite/ast_transformers/__init__.py +0 -1
  808. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
  809. mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
  810. mindspore/rewrite/common/__init__.py +2 -0
  811. mindspore/rewrite/common/event.py +1 -1
  812. mindspore/rewrite/common/observable.py +1 -1
  813. mindspore/rewrite/common/observer.py +1 -1
  814. mindspore/rewrite/common/rewrite_elog.py +35 -0
  815. mindspore/rewrite/namer.py +2 -2
  816. mindspore/rewrite/namespace.py +14 -4
  817. mindspore/rewrite/node.py +161 -13
  818. mindspore/rewrite/parser.py +0 -1
  819. mindspore/rewrite/parser_register.py +0 -1
  820. mindspore/rewrite/parsers/arguments_parser.py +3 -2
  821. mindspore/rewrite/parsers/assign_parser.py +267 -67
  822. mindspore/rewrite/parsers/attribute_parser.py +56 -0
  823. mindspore/rewrite/parsers/class_def_parser.py +191 -108
  824. mindspore/rewrite/parsers/constant_parser.py +101 -0
  825. mindspore/rewrite/parsers/container_parser.py +88 -0
  826. mindspore/rewrite/parsers/for_parser.py +28 -15
  827. mindspore/rewrite/parsers/function_def_parser.py +21 -5
  828. mindspore/rewrite/parsers/if_parser.py +11 -28
  829. mindspore/rewrite/parsers/module_parser.py +9 -6
  830. mindspore/rewrite/parsers/return_parser.py +3 -2
  831. mindspore/rewrite/sparsify/__init__.py +0 -0
  832. mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
  833. mindspore/rewrite/sparsify/sparsify.py +109 -0
  834. mindspore/rewrite/sparsify/utils.py +173 -0
  835. mindspore/rewrite/symbol_tree.py +322 -109
  836. mindspore/rewrite/symbol_tree_builder.py +45 -8
  837. mindspore/rewrite/symbol_tree_dumper.py +0 -1
  838. mindspore/rewrite/topological_manager.py +1 -2
  839. mindspore/run_check/_check_version.py +209 -112
  840. mindspore/run_check/run_check.py +2 -1
  841. mindspore/tbbmalloc.dll +0 -0
  842. mindspore/tinyxml2.dll +0 -0
  843. mindspore/train/__init__.py +6 -4
  844. mindspore/train/_utils.py +28 -5
  845. mindspore/train/amp.py +321 -50
  846. mindspore/train/callback/__init__.py +3 -1
  847. mindspore/train/callback/_backup_and_restore.py +120 -0
  848. mindspore/train/callback/_callback.py +8 -8
  849. mindspore/train/callback/_checkpoint.py +12 -9
  850. mindspore/train/callback/_early_stop.py +13 -7
  851. mindspore/train/callback/_history.py +8 -8
  852. mindspore/train/callback/_lambda_callback.py +6 -6
  853. mindspore/train/callback/_landscape.py +36 -38
  854. mindspore/train/callback/_loss_monitor.py +12 -6
  855. mindspore/train/callback/_lr_scheduler_callback.py +2 -4
  856. mindspore/train/callback/_on_request_exit.py +212 -0
  857. mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
  858. mindspore/train/callback/_summary_collector.py +27 -19
  859. mindspore/train/callback/_time_monitor.py +13 -7
  860. mindspore/train/checkpoint_pb2.py +68 -8
  861. mindspore/train/data_sink.py +122 -33
  862. mindspore/train/dataset_helper.py +28 -87
  863. mindspore/train/loss_scale_manager.py +4 -7
  864. mindspore/{nn → train}/metrics/__init__.py +20 -20
  865. mindspore/{nn → train}/metrics/accuracy.py +12 -10
  866. mindspore/{nn → train}/metrics/auc.py +4 -4
  867. mindspore/{nn → train}/metrics/bleu_score.py +4 -4
  868. mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
  869. mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
  870. mindspore/{nn → train}/metrics/dice.py +6 -5
  871. mindspore/{nn → train}/metrics/error.py +7 -5
  872. mindspore/{nn → train}/metrics/fbeta.py +9 -7
  873. mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
  874. mindspore/{nn → train}/metrics/loss.py +4 -3
  875. mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
  876. mindspore/{nn → train}/metrics/metric.py +6 -5
  877. mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
  878. mindspore/{nn → train}/metrics/perplexity.py +5 -4
  879. mindspore/{nn → train}/metrics/precision.py +5 -4
  880. mindspore/{nn → train}/metrics/recall.py +5 -4
  881. mindspore/{nn → train}/metrics/roc.py +7 -6
  882. mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
  883. mindspore/{nn → train}/metrics/topk.py +7 -5
  884. mindspore/train/mind_ir_pb2.py +339 -32
  885. mindspore/train/model.py +113 -84
  886. mindspore/train/serialization.py +547 -167
  887. mindspore/train/summary/_summary_adapter.py +1 -1
  888. mindspore/train/summary/summary_record.py +43 -12
  889. mindspore/train/train_thor/convert_utils.py +7 -1
  890. mindspore/train/train_thor/dataset_helper.py +3 -3
  891. mindspore/train/train_thor/model_thor.py +0 -4
  892. mindspore/turbojpeg.dll +0 -0
  893. mindspore/vcmeta.dll +0 -0
  894. mindspore/vcruntime140.dll +0 -0
  895. mindspore/vcruntime140_1.dll +0 -0
  896. mindspore/version.py +1 -1
  897. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
  898. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +901 -660
  899. mindspore/compression/common/constant.py +0 -124
  900. mindspore/compression/export/__init__.py +0 -19
  901. mindspore/compression/export/quant_export.py +0 -514
  902. mindspore/compression/quant/qat.py +0 -636
  903. mindspore/compression/quant/quant_utils.py +0 -462
  904. mindspore/compression/quant/quantizer.py +0 -68
  905. mindspore/libatomic-1.dll +0 -0
  906. mindspore/libgcc_s_seh-1.dll +0 -0
  907. mindspore/libgfortran-4.dll +0 -0
  908. mindspore/libgomp-1.dll +0 -0
  909. mindspore/libjpeg-62.dll +0 -0
  910. mindspore/libmindspore.dll +0 -0
  911. mindspore/libmindspore_common.dll +0 -0
  912. mindspore/libmindspore_core.dll +0 -0
  913. mindspore/libmindspore_glog.dll +0 -0
  914. mindspore/libnnacl.dll +0 -0
  915. mindspore/libopencv_core452.dll +0 -0
  916. mindspore/libopencv_imgcodecs452.dll +0 -0
  917. mindspore/libopencv_imgproc452.dll +0 -0
  918. mindspore/libquadmath-0.dll +0 -0
  919. mindspore/libsqlite3.dll +0 -0
  920. mindspore/libssp-0.dll +0 -0
  921. mindspore/libstdc++-6.dll +0 -0
  922. mindspore/libtinyxml2.dll +0 -0
  923. mindspore/libturbojpeg.dll +0 -0
  924. mindspore/libwinpthread-1.dll +0 -0
  925. mindspore/nn/layer/quant.py +0 -1868
  926. mindspore/nn/layer/rnn_utils.py +0 -90
  927. mindspore/nn/probability/dpn/__init__.py +0 -22
  928. mindspore/nn/probability/dpn/vae/__init__.py +0 -25
  929. mindspore/nn/probability/dpn/vae/cvae.py +0 -138
  930. mindspore/nn/probability/dpn/vae/vae.py +0 -122
  931. mindspore/nn/probability/infer/__init__.py +0 -22
  932. mindspore/nn/probability/infer/variational/elbo.py +0 -70
  933. mindspore/nn/probability/infer/variational/svi.py +0 -84
  934. mindspore/nn/probability/toolbox/__init__.py +0 -22
  935. mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
  936. mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
  937. mindspore/nn/probability/transforms/__init__.py +0 -22
  938. mindspore/nn/probability/transforms/transform_bnn.py +0 -262
  939. mindspore/nn/probability/zhusuan/__init__.py +0 -18
  940. mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
  941. mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
  942. mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
  943. mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
  944. mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
  945. mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
  946. mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
  947. mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
  948. mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
  949. mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
  950. mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
  951. mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
  952. mindspore/ops/composite/array_ops.py +0 -210
  953. mindspore/ops/composite/clip_ops.py +0 -238
  954. mindspore/ops/composite/random_ops.py +0 -426
  955. mindspore/ops/composite/vmap_ops.py +0 -38
  956. mindspore/ops/operations/sponge_ops.py +0 -3531
  957. mindspore/ops/operations/sponge_update_ops.py +0 -2546
  958. mindspore/parallel/nn/__init__.py +0 -42
  959. mindspore/parallel/nn/loss.py +0 -22
  960. mindspore/parallel/nn/moe.py +0 -21
  961. mindspore/parallel/nn/op_parallel_config.py +0 -22
  962. mindspore/parallel/nn/transformer.py +0 -31
  963. mindspore/run_check/_check_deps_version.py +0 -84
  964. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
  965. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
  966. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,701 @@
1
+ # Copyright 2022 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """
16
+ Transformer Cells module, include TransformerEncoderLayer, TransformerDecoderLayer,
17
+ TransformerEncoder, TransformerDecoder, Transformer.
18
+ """
19
+ import copy
20
+ import math
21
+ from typing import Union, Optional
22
+ import mindspore
23
+ import mindspore.ops as ops
24
+ from mindspore.common.tensor import Tensor
25
+ from mindspore.common.parameter import Parameter
26
+ from mindspore.common.initializer import initializer, XavierNormal, XavierUniform, \
27
+ HeUniform, Uniform, _calculate_fan_in_and_fan_out
28
+ from mindspore.ops.function.nn_func import multi_head_attention_forward
29
+ from mindspore.nn.cell import Cell
30
+ from .basic import Dense, Dropout
31
+ from .activation import ReLU, GELU
32
+ from .normalization import LayerNorm
33
+ from .container import CellList
34
+
35
+ __all__ = ['MultiheadAttention', 'TransformerEncoderLayer', 'TransformerDecoderLayer',
36
+ 'TransformerEncoder', 'TransformerDecoder', 'Transformer']
37
+
38
+
39
+ class _Linear(Dense):
40
+ def __init__(self, in_channels, out_channels, has_bias=True):
41
+ fan_in, _ = _calculate_fan_in_and_fan_out((out_channels, in_channels))
42
+ bound = 1 / math.sqrt(fan_in)
43
+ super().__init__(in_channels, out_channels, weight_init=HeUniform(math.sqrt(5)),
44
+ bias_init=Uniform(bound), has_bias=has_bias, activation=None)
45
+
46
+
47
+ class MultiheadAttention(Cell):
48
+ r"""
49
+ This is an implementation of multihead attention in the paper `Attention is all you need
50
+ <https://arxiv.org/pdf/1706.03762v5.pdf>`_. Given the query vector with source length, and the
51
+ key and value vector with target length, the attention will be performed as the following
52
+
53
+ .. math::
54
+ MultiHeadAttention(query, key, vector) = Concat(head_1, \dots, head_h)W^O
55
+
56
+ where :math:`head_i = Attention(QW_i^Q, KW_i^K, VW_i^V)`. The default is with a bias.
57
+
58
+ if query, key and value tensor is same, then it will be self attention.
59
+
60
+ Args:
61
+ embed_dim (int): Total dimension of MultiheadAttention.
62
+ num_heads (int): Number of attention heads. Note that `embed_dim` will be split
63
+ across `num_heads` (i.e. each head will have dimension `embed_dim // num_heads`).
64
+ dropout (float): Dropout probability of `attn_output_weights`. Default: ``0.0``.
65
+ has_bias (bool): Whether adds bias to input / output projection layers. Default: ``True``.
66
+ add_bias_kv (bool): Whether adds bias to the key and value sequences at axis=0. Default: ``False``.
67
+ add_zero_attn (bool): Whether adds a new batch of zeros to the key and value sequences at axis=1.
68
+ Default: ``False``.
69
+ kdim (int): Total number of features for keys. Default: ``None`` (`kdim=embed_dim`).
70
+ vdim (int): Total number of features for values. Default: ``None`` (`vdim=embed_dim`).
71
+ batch_first (bool): If ``True``, then the input and output shape are :math:`(batch, seq, feature)` ,
72
+ else :math:`(seq, batch, feature)` . Default: ``False``.
73
+
74
+ Inputs:
75
+ - **query** (Tensor): The query embeddings. If `query` is unbatched, the shape is :math:`(L, E_q)`,
76
+ otherwise the shape is :math:`(L, N, E_q)` when `batch_first=False` or :math:`(N, L, E_q)` when
77
+ `batch_first=True`, where :math:`L`is the target sequence length, :math:`N` is the batch size,
78
+ and :math:`E_q` is the query embedding dimension `embed_dim`. Queries are compared against
79
+ key-value pairs to produce the output. See "Attention Is All You Need" for more details.
80
+ - **key** (Tensor): The key embeddings. If `key` is unbatched, the shape is :math:`(S, E_k)`, otherwise
81
+ the shape is :math:`(S, N, E_k)` when `batch_first=False` or :math:`(N, S, E_k)` when
82
+ `batch_first=True`, where :math:`S` is the source sequence length, :math:`N` is the batch size,
83
+ and :math:`E_k` is the key embedding dimension `kdim`. See "Attention Is All You Need" for more details.
84
+ - **value** (Tensor): The value embeddings. If `value` is unbatched, the shape is :math:`(S, E_v)`,
85
+ otherwise the shape is :math:`(S, N, E_v)` when `batch_first=False` or :math:`(N, S, E_v)` when
86
+ `batch_first=True`, where :math:`S` is the source sequence length, :math:`N` is the batch size,
87
+ and :math:`E_v` is the value embedding dimension `vdim`. See "Attention Is All You Need" for more details.
88
+ - **key_padding_mask** (Tensor, optional): If specified, a mask of shape :math:`(N, S)` indicating which
89
+ elements within `key` to ignore for the purpose of attention (i.e. treat as "padding").
90
+ For unbatched `query`, shape should be :math:`(S)`. Binary and byte masks are supported.
91
+ For a binary mask, a ``True`` value indicates that the corresponding `key` value will be ignored for
92
+ the purpose of attention. For a float mask, it will be directly added to the corresponding `key` value.
93
+ - **need_weights** (bool): Whether returns `attn_output_weights` in addition to `attn_outputs`.
94
+ Default: ``True``.
95
+ - **attn_mask** (Tensor, optional): If specified, a 2D or 3D mask preventing attention to certain positions.
96
+ Must be of shape :math:`(L, S)` or :math:`(N\cdot\text{num\_heads}, L, S)`, where :math:`N` is the
97
+ batch size, :math:`L` is the target sequence length, and :math:`S` is the source sequence length.
98
+ A 2D mask will be broadcasted across the batch while a 3D mask allows for a different mask for each entry
99
+ in the batch. Binary, byte, and float masks are supported. For a binary mask, a ``True`` value indicates
100
+ that the corresponding position is not allowed to attend. For a byte mask, a non-zero value indicates that
101
+ the corresponding position is not allowed to attend. For a float mask, the mask values will be added to
102
+ the attention weight.
103
+ - **average_attn_weights** (bool): If true, indicates that the returned `attn_weights` should be averaged
104
+ across heads. Otherwise, `attn_weights` are provided separately per head. Note that this flag only
105
+ has an effect when `need_weights=True`. Default: ``True`` (i.e. average weights across heads)
106
+
107
+ Outputs:
108
+ Tuple, a tuple contains(`attn_output`, `attn_output_weights`)
109
+
110
+ - **attn_output** - Attention outputs. If input is unbatched, the output shape is :math:`(L, E)`, otherwise
111
+ the output shape is :math:`(L, N, E)` when `batch_first=False` or :math:`(N, L, E)` when
112
+ `batch_first=True`, where :math:`L` is the target sequence length, :math:`N` is the batch size,
113
+ and :math:`E` is the embedding dimension `embed_dim`.
114
+ - **attn_output_weights** - Only returned when `need_weights=True`. If `average_attn_weights=True`,
115
+ returns attention weights averaged across heads with shape :math:`(L, S)` when input is unbatched or
116
+ :math:`(N, L, S)` when input is batched, where :math:`N` is the batch size, :math:`L` is
117
+ the target sequence length, and :math:`S` is the source sequence length.
118
+ If `average_attn_weights=False`, returns attention weights per
119
+ head of shape :math:`(\text{num\_heads}, L, S)` when input is unbatched or
120
+ :math:`(N, \text{num\_heads}, L, S)` when input is batched.
121
+
122
+ Supported Platforms:
123
+ ``Ascend`` ``GPU`` ``CPU``
124
+
125
+ Examples:
126
+ >>> embed_dim, num_heads = 128, 8
127
+ >>> seq_length, batch_size = 10, 8
128
+ >>> query = Tensor(np.random.randn(seq_length, batch_size, embed_dim), mindspore.float32)
129
+ >>> key = Tensor(np.random.randn(seq_length, batch_size, embed_dim), mindspore.float32)
130
+ >>> value = Tensor(np.random.randn(seq_length, batch_size, embed_dim), mindspore.float32)
131
+ >>> multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)
132
+ >>> attn_output, attn_output_weights = multihead_attn(query, key, value)
133
+ >>> print(attn_output.shape)
134
+ (10, 8, 128)
135
+ """
136
+
137
+ def __init__(self, embed_dim, num_heads, dropout=0., has_bias=True, add_bias_kv=False,
138
+ add_zero_attn=False, kdim=None, vdim=None, batch_first=False):
139
+ super().__init__()
140
+ self.embed_dim = embed_dim
141
+ self.kdim = kdim if kdim is not None else embed_dim
142
+ self.vdim = vdim if vdim is not None else embed_dim
143
+ self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim
144
+
145
+ self.num_heads = num_heads
146
+ self.dropout = dropout
147
+ self.batch_first = batch_first
148
+ self.head_dim = embed_dim // num_heads
149
+ if self.head_dim * num_heads != self.embed_dim:
150
+ raise ValueError("The init argument 'embed_dim' must be divisible by 'num_heads'.")
151
+
152
+ if not self._qkv_same_embed_dim:
153
+ self.q_proj_weight = Parameter(initializer(XavierUniform(), (embed_dim, embed_dim)), 'q_proj_weight')
154
+ self.k_proj_weight = Parameter(initializer(XavierUniform(), (embed_dim, self.kdim)), 'k_proj_weight')
155
+ self.v_proj_weight = Parameter(initializer(XavierUniform(), (embed_dim, self.vdim)), 'v_proj_weight')
156
+ self.in_proj_weight = None
157
+ else:
158
+ self.in_proj_weight = Parameter(initializer(XavierUniform(), (3 * embed_dim, embed_dim)), 'in_proj_weight')
159
+ self.q_proj_weight = None
160
+ self.k_proj_weight = None
161
+ self.v_proj_weight = None
162
+
163
+ if has_bias:
164
+ self.in_proj_bias = Parameter(initializer('zeros', (3 * embed_dim)), 'in_proj_bias')
165
+ else:
166
+ self.in_proj_bias = None
167
+ self.out_proj = _Linear(embed_dim, embed_dim, has_bias=has_bias)
168
+
169
+ if add_bias_kv:
170
+ self.bias_k = Parameter(initializer(XavierNormal(), (1, 1, embed_dim)), 'bias_k')
171
+ self.bias_v = Parameter(initializer(XavierNormal(), (1, 1, embed_dim)), 'bias_v')
172
+ else:
173
+ self.bias_k = self.bias_v = None
174
+
175
+ self.add_zero_attn = add_zero_attn
176
+ self.k_is_v = False
177
+ self.q_is_k = False
178
+
179
+ def __call__(self, *args, **kwargs):
180
+ query = kwargs.get('query', args[0])
181
+ key = kwargs.get('key', args[1])
182
+ value = kwargs.get('value', args[2])
183
+ self.k_is_v = key is value
184
+ self.q_is_k = query is key
185
+ return super().__call__(*args, **kwargs)
186
+
187
+ def construct(self, query: Tensor, key: Tensor, value: Tensor, key_padding_mask: Optional[Tensor] = None,
188
+ need_weights: bool = True, attn_mask: Optional[Tensor] = None, average_attn_weights: bool = True):
189
+ is_batched = query.ndim == 3
190
+ if key_padding_mask is not None:
191
+ _kpm_dtype = key_padding_mask.dtype
192
+ if _kpm_dtype != mindspore.bool_ and not ops.is_floating_point(key_padding_mask):
193
+ raise ValueError(
194
+ "only bool and floating types of key_padding_mask are supported")
195
+
196
+ if self.batch_first and is_batched:
197
+ # k_is_v and q_is_k preprocess in __call__ since Graph mode do not support `is`
198
+ if self.k_is_v:
199
+ if self.q_is_k:
200
+ query = key = value = query.swapaxes(1, 0)
201
+ else:
202
+ query, key = [x.swapaxes(1, 0) for x in (query, key)]
203
+ value = key
204
+ else:
205
+ query, key, value = [x.swapaxes(1, 0) for x in (query, key, value)]
206
+
207
+ if not self._qkv_same_embed_dim:
208
+ attn_output, attn_output_weights = multi_head_attention_forward(
209
+ query, key, value, self.embed_dim, self.num_heads,
210
+ self.in_proj_weight, self.in_proj_bias,
211
+ self.bias_k, self.bias_v, self.add_zero_attn,
212
+ self.dropout, self.out_proj.weight, self.out_proj.bias,
213
+ training=self.training,
214
+ key_padding_mask=key_padding_mask,
215
+ attn_mask=attn_mask, use_separate_proj_weight=True,
216
+ q_proj_weight=self.q_proj_weight, k_proj_weight=self.k_proj_weight,
217
+ v_proj_weight=self.v_proj_weight, average_attn_weights=average_attn_weights,
218
+ k_is_v=self.k_is_v, q_is_k=self.q_is_k)
219
+ else:
220
+ attn_output, attn_output_weights = multi_head_attention_forward(
221
+ query, key, value, self.embed_dim, self.num_heads,
222
+ self.in_proj_weight, self.in_proj_bias,
223
+ self.bias_k, self.bias_v, self.add_zero_attn,
224
+ self.dropout, self.out_proj.weight, self.out_proj.bias,
225
+ training=self.training,
226
+ key_padding_mask=key_padding_mask,
227
+ attn_mask=attn_mask, average_attn_weights=average_attn_weights,
228
+ k_is_v=self.k_is_v, q_is_k=self.q_is_k)
229
+
230
+ if self.batch_first and is_batched:
231
+ attn_output = attn_output.swapaxes(1, 0)
232
+ if need_weights:
233
+ return attn_output, attn_output_weights
234
+ return (attn_output,)
235
+
236
+
237
+ class TransformerEncoderLayer(Cell):
238
+ r"""
239
+ Transformer Encoder Layer. This is an implementation of the single layer of the transformer
240
+ encoder layer, including multihead attention and feedward layer.
241
+
242
+ Args:
243
+ d_model (int): The number of features in the input tensor.
244
+ nhead (int): The number of heads in the MultiheadAttention modules.
245
+ dim_feedforward (int): The dimension of the feedforward layer. Default: ``2048``.
246
+ dropout (float): The dropout value. Default: ``0.1``.
247
+ activation (Union[str, callable, Cell]): The activation function of the intermediate layer,
248
+ can be a string (`"relu"` or `"gelu"`), Cell instance (`nn.ReLU()` or `nn.GELU()`) or
249
+ a callable (`ops.relu` or `ops.gelu`). Default: ``"relu"``.
250
+ layer_norm_eps (float): The epsilon value in LayerNorm modules. Default: ``1e-5``.
251
+ batch_first (bool): If `batch_first = True`, then the shape of input and output tensors is
252
+ :math:`(batch, seq, feature)` , otherwise the shape is :math:`(seq, batch, feature)` .
253
+ Default: ``False``.
254
+ norm_first (bool): If `norm_first = True`, layer norm is done prior to attention and feedforward
255
+ operations, respectively. Default: ``False``.
256
+
257
+ Inputs:
258
+ - **src** (Tensor): the sequence to the encoder layer.
259
+ - **src_mask** (Tensor, optional): the mask for the src sequence. Default: ``None``.
260
+ - **src_key_padding_mask** (Tensor, optional): the mask for the src keys per batch.
261
+ Default: ``None``.
262
+
263
+ Outputs:
264
+ Tensor.
265
+
266
+ Supported Platforms:
267
+ ``Ascend`` ``GPU`` ``CPU``
268
+
269
+ Examples:
270
+ >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
271
+ >>> src = Tensor(np.random.rand(10, 32, 512), mindspore.float32)
272
+ >>> out = encoder_layer(src)
273
+ >>> # Alternatively, when batch_first=True:
274
+ >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8, batch_first=True)
275
+ >>> src = Tensor(np.random.rand(32, 10, 512), mindspore.float32)
276
+ >>> out = encoder_layer(src)
277
+ >>> print(out.shape)
278
+ (32, 10, 512)
279
+ """
280
+ __constants__ = ['batch_first', 'norm_first']
281
+
282
+ def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout: float = 0.1,
283
+ activation: Union[str, Cell, callable] = 'relu', layer_norm_eps: float = 1e-5,
284
+ batch_first: bool = False, norm_first: bool = False):
285
+ super().__init__()
286
+ self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first)
287
+ # feedforward layer
288
+ self.linear1 = _Linear(d_model, dim_feedforward)
289
+ self.dropout = Dropout(p=dropout)
290
+ self.linear2 = _Linear(dim_feedforward, d_model)
291
+
292
+ self.norm_first = norm_first
293
+ self.norm1 = LayerNorm((d_model,), epsilon=layer_norm_eps)
294
+ self.norm2 = LayerNorm((d_model,), epsilon=layer_norm_eps)
295
+ self.dropout1 = Dropout(p=dropout)
296
+ self.dropout2 = Dropout(p=dropout)
297
+
298
+ if not isinstance(activation, str) and not isinstance(activation, Cell) \
299
+ and not callable(activation):
300
+ raise ValueError(f"The argument 'activation' must be str, callable or Cell instance,"
301
+ f" but get {activation}.")
302
+ if isinstance(activation, Cell) and (not isinstance(activation, ReLU) or \
303
+ not isinstance(activation, GELU)):
304
+ raise ValueError(f"The argument 'activation' must be nn.ReLU or nn.GELU instance,"
305
+ f" but get {activation}.")
306
+ if callable(activation) and (activation is not ops.relu or \
307
+ activation is not ops.gelu):
308
+ raise ValueError(f"The argument 'activation' must be ops.relu or ops.gelu instance,"
309
+ f" but get {activation}.")
310
+ # string inputs of activation
311
+ if isinstance(activation, str):
312
+ activation = _get_activation_fn(activation)
313
+ self.activation = activation
314
+
315
+ def construct(self, src: Tensor, src_mask: Optional[Tensor] = None,
316
+ src_key_padding_mask: Optional[Tensor] = None):
317
+ if src_key_padding_mask is not None:
318
+ _skpm_dtype = src_key_padding_mask.dtype
319
+ if _skpm_dtype != mindspore.bool_ and not ops.is_floating_point(src_key_padding_mask):
320
+ raise AssertionError(
321
+ "only bool and floating types of key_padding_mask are supported")
322
+
323
+ x = src
324
+ if self.norm_first:
325
+ x = x + self._sa_block(self.norm1(x), src_mask, src_key_padding_mask)
326
+ x = x + self._ff_block(self.norm2(x))
327
+ else:
328
+ x = self.norm1(x + self._sa_block(x, src_mask, src_key_padding_mask))
329
+ x = self.norm2(x + self._ff_block(x))
330
+
331
+ return x
332
+
333
+ def _sa_block(self, x, attn_mask, key_padding_mask):
334
+ x = self.self_attn(x, x, x,
335
+ attn_mask=attn_mask,
336
+ key_padding_mask=key_padding_mask,
337
+ need_weights=False)[0]
338
+ return self.dropout1(x)
339
+
340
+ def _ff_block(self, x):
341
+ x = self.linear2(self.dropout(self.activation(self.linear1(x))))
342
+ return self.dropout2(x)
343
+
344
+
345
+ class TransformerDecoderLayer(Cell):
346
+ r"""
347
+ Transformer Decoder Layer. This is an implementation of the single layer of the transformer
348
+ decoder layer, including self-attention, cross attention and feedward layer.
349
+
350
+ Args:
351
+ d_model (int): The number of expected features in the input tensor.
352
+ nhead (int): The number of heads in the MultiheadAttention modules.
353
+ dim_feedforward (int): The dimension of the feedforward layer. Default: ``2048``.
354
+ dropout (float): The dropout value. Default: ``0.1``.
355
+ activation (Union[str, callable, Cell]): The activation function of the intermediate layer,
356
+ can be a string (`"relu"` or `"gelu"`), Cell instance (`nn.ReLU()` or `nn.GELU()`) or
357
+ a callable (`ops.relu` or `ops.gelu`). Default: ``"relu"``
358
+ layer_norm_eps (float): The epsilon value in LayerNorm modules. Default: ``1e-5``.
359
+ batch_first (bool): If `batch_first = True`, then the shape of input and output tensors is
360
+ :math:`(batch, seq, feature)` , otherwise the shape is :math:`(seq, batch, feature)`.
361
+ Default: ``False``.
362
+ norm_first (bool): If `norm_first = True`, layer norm is done prior to attention and feedforward
363
+ operations, respectively. Default: ``False``.
364
+
365
+ Inputs:
366
+ - **tgt** (Tensor): The sequence to the decoder layer.
367
+ - **memory** (Tensor): The sequence from the last layer of the encoder.
368
+ - **tgt_mask** (Tensor, optional): The mask of the tgt sequence. Default: ``None``.
369
+ - **memory_mask** (Tensor, optional): The mask of the memory sequence. Default: ``None``.
370
+ - **tgt_key_padding_mask** (Tensor, optional): The mask of the tgt keys per batch.
371
+ Default: ``None``.
372
+ - **memory_key_padding_mask** (Tensor, optional): The mask of the memory keys per batch.
373
+ Default: ``None``.
374
+
375
+ Outputs:
376
+ Tensor.
377
+
378
+ Supported Platforms:
379
+ ``Ascend`` ``GPU`` ``CPU``
380
+
381
+ Examples:
382
+ >>> decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
383
+ >>> memory = Tensor(np.random.rand(10, 32, 512), mindspore.float32)
384
+ >>> tgt = Tensor(np.random.rand(20, 32, 512), mindspore.float32)
385
+ >>> out = decoder_layer(tgt, memory)
386
+ >>> # Alternatively, when `batch_first` is ``True``:
387
+ >>> decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8, batch_first=True)
388
+ >>> memory = Tensor(np.random.rand(32, 10, 512), mindspore.float32)
389
+ >>> tgt = Tensor(np.random.rand(32, 20, 512), mindspore.float32)
390
+ >>> out = decoder_layer(tgt, memory)
391
+ >>> print(out.shape)
392
+ (32, 20, 512)
393
+ """
394
+ __constants__ = ['batch_first', 'norm_first']
395
+
396
+ def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout: float = 0.1,
397
+ activation: Union[str, Cell, callable] = 'relu', layer_norm_eps: float = 1e-5,
398
+ batch_first: bool = False, norm_first: bool = False):
399
+ super().__init__()
400
+ self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first)
401
+ self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first)
402
+ # feedforward layer
403
+ self.linear1 = _Linear(d_model, dim_feedforward)
404
+ self.dropout = Dropout(p=dropout)
405
+ self.linear2 = _Linear(dim_feedforward, d_model)
406
+
407
+ self.norm_first = norm_first
408
+ self.norm1 = LayerNorm((d_model,), epsilon=layer_norm_eps)
409
+ self.norm2 = LayerNorm((d_model,), epsilon=layer_norm_eps)
410
+ self.norm3 = LayerNorm((d_model,), epsilon=layer_norm_eps)
411
+ self.dropout1 = Dropout(p=dropout)
412
+ self.dropout2 = Dropout(p=dropout)
413
+ self.dropout3 = Dropout(p=dropout)
414
+
415
+ if not isinstance(activation, str) and not isinstance(activation, Cell) \
416
+ and not callable(activation):
417
+ raise ValueError(f"The argument 'activation' must be str, callable or Cell instance,"
418
+ f" but get {activation}.")
419
+ if isinstance(activation, Cell) and (not isinstance(activation, ReLU) or \
420
+ not isinstance(activation, GELU)):
421
+ raise ValueError(f"The argument 'activation' must be nn.ReLU or nn.GELU instance,"
422
+ f" but get {activation}.")
423
+ if callable(activation) and (activation is not ops.relu or \
424
+ activation is not ops.gelu):
425
+ raise ValueError(f"The argument 'activation' must be ops.relu or ops.gelu instance,"
426
+ f" but get {activation}.")
427
+ # string inputs of activation
428
+ if isinstance(activation, str):
429
+ activation = _get_activation_fn(activation)
430
+ self.activation = activation
431
+
432
+ def construct(self, tgt: Tensor, memory: Tensor, tgt_mask: Optional[Tensor] = None,
433
+ memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None,
434
+ memory_key_padding_mask: Optional[Tensor] = None):
435
+ x = tgt
436
+ if self.norm_first:
437
+ x = x + self._sa_block(self.norm1(x), tgt_mask, tgt_key_padding_mask)
438
+ x = x + self._mha_block(self.norm2(x), memory, memory_mask, memory_key_padding_mask)
439
+ x = x + self._ff_block(self.norm3(x))
440
+ else:
441
+ x = self.norm1(x + self._sa_block(x, tgt_mask, tgt_key_padding_mask))
442
+ x = self.norm2(x + self._mha_block(x, memory, memory_mask, memory_key_padding_mask))
443
+ x = self.norm3(x + self._ff_block(x))
444
+
445
+ return x
446
+
447
+ def _sa_block(self, x, attn_mask, key_padding_mask):
448
+ x = self.self_attn(x, x, x,
449
+ attn_mask=attn_mask,
450
+ key_padding_mask=key_padding_mask,
451
+ need_weights=False)[0]
452
+ return self.dropout1(x)
453
+
454
+ def _mha_block(self, x, mem, attn_mask, key_padding_mask):
455
+ x = self.multihead_attn(x, mem, mem,
456
+ attn_mask=attn_mask,
457
+ key_padding_mask=key_padding_mask,
458
+ need_weights=False)[0]
459
+ return self.dropout2(x)
460
+
461
+ def _ff_block(self, x):
462
+ x = self.linear2(self.dropout(self.activation(self.linear1(x))))
463
+ return self.dropout3(x)
464
+
465
+
466
+ class TransformerEncoder(Cell):
467
+ r"""
468
+ Transformer Encoder module with multi-layer stacked of `TransformerEncoderLayer`, including multihead self
469
+ attention and feedforward layer. Users can build the
470
+ BERT(https://arxiv.org/abs/1810.04805) model with corresponding parameters.
471
+
472
+ Args:
473
+ encoder_layer (Cell): An instance of the TransformerEncoderLayer() class.
474
+ num_layers (int): The number of encoder-layers in the encoder.
475
+ norm (Cell, optional): The layer normalization module.
476
+
477
+ Inputs:
478
+ - **src** (Tensor): The sequence to the encoder.
479
+ - **src_mask** (Tensor, optional): The mask of the src sequence. Default: ``None``.
480
+ - **src_key_padding_mask** (Tensor, optional): the mask of the src keys per batch .
481
+ Default: ``None``.
482
+
483
+ Outputs:
484
+ Tensor.
485
+
486
+ Supported Platforms:
487
+ ``Ascend`` ``GPU`` ``CPU``
488
+
489
+ Examples:
490
+ >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
491
+ >>> transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
492
+ >>> src = Tensor(np.random.rand(10, 32, 512), mindspore.float32)
493
+ >>> out = transformer_encoder(src)
494
+ >>> print(out.shape)
495
+ (10, 32, 512)
496
+ """
497
+ __constants__ = ['norm']
498
+
499
+ def __init__(self, encoder_layer, num_layers, norm=None):
500
+ super(TransformerEncoder, self).__init__()
501
+ self.layers = _get_clones(encoder_layer, num_layers)
502
+ self.num_layers = num_layers
503
+ self.norm = norm
504
+
505
+ def construct(self, src: Tensor, src_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None):
506
+ if src_key_padding_mask is not None:
507
+ _skpm_dtype = src_key_padding_mask.dtype
508
+ if _skpm_dtype != mindspore.bool_ and not ops.is_floating_point(src_key_padding_mask):
509
+ raise AssertionError(
510
+ "only bool and floating types of key_padding_mask are supported")
511
+ output = src
512
+ src_key_padding_mask_for_layers = src_key_padding_mask
513
+ for mod in self.layers:
514
+ output = mod(output, src_mask=src_mask, src_key_padding_mask=src_key_padding_mask_for_layers)
515
+
516
+ if self.norm is not None:
517
+ output = self.norm(output)
518
+
519
+ return output
520
+
521
+
522
+ class TransformerDecoder(Cell):
523
+ r"""
524
+ Transformer Decoder module with multi-layer stacked of `TransformerDecoderLayer`, including multihead self
525
+ attention, cross attention and feedforward layer.
526
+
527
+ Args:
528
+ decoder_layer (Cell): An instance of the :class:`mindspore.nn.TransformerDecoderLayer` class.
529
+ num_layers (int): The number of decoder-layers in the decoder.
530
+ norm (Cell, optional): The layer normalization module.
531
+
532
+ Inputs:
533
+ - **tgt** (Tensor): The sequence to the decoder.
534
+ - **memory** (Tensor): The sequence from the last layer of the encoder.
535
+ - **tgt_mask** (Tensor, optional): the mask of the tgt sequence. Default: ``None``.
536
+ - **memory_mask** (Tensor, optional): the mask of the memory sequence. Default: ``None``.
537
+ - **tgt_key_padding_mask** (Tensor, optional): the mask of the tgt keys per batch.
538
+ Default: ``None``.
539
+ - **memory_key_padding_mask** (Tensor, optional): the mask of the memory keys per batch.
540
+ Default: ``None``.
541
+
542
+ Outputs:
543
+ Tensor.
544
+
545
+ Supported Platforms:
546
+ ``Ascend`` ``GPU`` ``CPU``
547
+
548
+ Examples:
549
+ >>> decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
550
+ >>> transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
551
+ >>> memory = Tensor(np.random.rand(10, 32, 512), mindspore.float32)
552
+ >>> tgt = Tensor(np.random.rand(20, 32, 512), mindspore.float32)
553
+ >>> out = transformer_decoder(tgt, memory)
554
+ >>> print(out.shape)
555
+ (20, 32, 512)
556
+ """
557
+ __constants__ = ['norm']
558
+
559
+ def __init__(self, decoder_layer, num_layers, norm=None):
560
+ super(TransformerDecoder, self).__init__()
561
+ self.layers = _get_clones(decoder_layer, num_layers)
562
+ self.num_layers = num_layers
563
+ self.norm = norm
564
+
565
+ def construct(self, tgt: Tensor, memory: Tensor, tgt_mask: Optional[Tensor] = None,
566
+ memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None,
567
+ memory_key_padding_mask: Optional[Tensor] = None):
568
+ output = tgt
569
+
570
+ for mod in self.layers:
571
+ output = mod(output, memory, tgt_mask=tgt_mask,
572
+ memory_mask=memory_mask,
573
+ tgt_key_padding_mask=tgt_key_padding_mask,
574
+ memory_key_padding_mask=memory_key_padding_mask)
575
+
576
+ if self.norm is not None:
577
+ output = self.norm(output)
578
+
579
+ return output
580
+
581
+
582
+ class Transformer(Cell):
583
+ r"""
584
+ Transformer module including encoder and decoder. The difference with the original implements is the module use
585
+ the residual addition before the layer normalization. And the default hidden act is `gelu`.
586
+ The details can be found in `Attention is all you need <https://arxiv.org/pdf/1706.03762v5.pdf>`_.
587
+
588
+ Args:
589
+ d_model (int): The number of expected features in the inputs tensor. Default: ``512``.
590
+ nhead (int): The number of heads in the MultiheadAttention modules. Default: ``8``.
591
+ num_encoder_layers (int): The number of encoder-layers in the encoder. Default: ``6``.
592
+ num_decoder_layers (int): The number of decoder-layers in the decoder. Default: ``6``.
593
+ dim_feedforward (int): The dimension of the feedforward layer. Default: ``2048``.
594
+ dropout (float): The dropout value. Default: ``0.1``.
595
+ activation (Union[str, callable, Cell]): The activation function of the intermediate layer,
596
+ can be a string (`"relu"` or `"gelu"`), Cell instance (`nn.ReLU()` or `nn.GELU()`) or
597
+ a callable (`ops.relu` or `ops.gelu`). Default: ``"relu"``
598
+ custom_encoder (Cell): Custom encoder. Default: ``None``.
599
+ custom_decoder (Cell): Custom decoder. Default: ``None``.
600
+ layer_norm_eps (float): the epsilion value in layer normalization module. Default: ``1e-5``.
601
+ batch_first (bool): If `batch_first = True`, then the shape of input and output tensors is
602
+ :math:`(batch, seq, feature)` , otherwise the shape is :math:`(seq, batch, feature)` .
603
+ Default: ``False``.
604
+ norm_first (bool): If `norm_first = True`, layer norm is done prior to attention and feedforward
605
+ operations, respectively. Default: ``False``.
606
+
607
+ Inputs:
608
+ - **src** (Tensor): The source sequence to the encoder.
609
+ - **tgt** (Tensor): The target sequence to the decoder.
610
+ - **src_mask** (Tensor, optional): The mask of the src sequence. Default: ``None``.
611
+ - **tgt_mask** (Tensor, optional): The mask of the tgt sequence. Default: ``None``.
612
+ - **memory_mask** (Tensor, optional): The additive mask of the encoder output.
613
+ Default: ``None``.
614
+ - **src_key_padding_mask** (Tensor, optional): The mask of src keys per batch.
615
+ Default: ``None``.
616
+ - **tgt_key_padding_mask** (Tensor, optional): The mask of tgt keys per batch.
617
+ Default: ``None``.
618
+ - **memory_key_padding_mask** (Tensor, optional): The mask of memory keys per batch.
619
+ Default: ``None``.
620
+
621
+ Outputs:
622
+ Tensor.
623
+
624
+ Supported Platforms:
625
+ ``Ascend`` ``GPU`` ``CPU``
626
+
627
+ Examples:
628
+ >>> transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)
629
+ >>> src = Tensor(np.random.rand(10, 32, 512), mindspore.float32)
630
+ >>> tgt = Tensor(np.random.rand(20, 32, 512), mindspore.float32)
631
+ >>> out = transformer_model(src, tgt)
632
+ >>> print(out.shape)
633
+ (20, 32, 512)
634
+ """
635
+
636
+ def __init__(self, d_model: int = 512, nhead: int = 8, num_encoder_layers: int = 6,
637
+ num_decoder_layers: int = 6, dim_feedforward: int = 2048, dropout: float = 0.1,
638
+ activation: Union[str, Cell, callable] = 'relu', custom_encoder: Optional[Cell] = None,
639
+ custom_decoder: Optional[Cell] = None, layer_norm_eps: float = 1e-5,
640
+ batch_first: bool = False, norm_first: bool = False):
641
+ super(Transformer, self).__init__()
642
+
643
+ if custom_encoder is not None:
644
+ self.encoder = custom_encoder
645
+ else:
646
+ encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout,
647
+ activation, layer_norm_eps, batch_first, norm_first)
648
+ encoder_norm = LayerNorm((d_model,), epsilon=layer_norm_eps)
649
+ self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
650
+
651
+ if custom_decoder is not None:
652
+ self.decoder = custom_decoder
653
+ else:
654
+ decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout,
655
+ activation, layer_norm_eps, batch_first, norm_first)
656
+ decoder_norm = LayerNorm((d_model,), epsilon=layer_norm_eps)
657
+ self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm)
658
+
659
+ for _, p in self.parameters_and_names():
660
+ if p.ndim > 1:
661
+ p.set_data(initializer('xavier_uniform', p.shape, p.dtype))
662
+
663
+ self.d_model = d_model
664
+ self.nhead = nhead
665
+
666
+ self.batch_first = batch_first
667
+
668
+ def construct(self, src: Tensor, tgt: Tensor, src_mask: Optional[Tensor] = None, tgt_mask: Optional[Tensor] = None,
669
+ memory_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None,
670
+ tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None):
671
+ is_batched = src.ndim == 3
672
+ if self.batch_first:
673
+ src_batch_size = src.shape[0]
674
+ tgt_batch_size = src.shape[0]
675
+ else:
676
+ src_batch_size = src.shape[1]
677
+ tgt_batch_size = src.shape[1]
678
+ if src_batch_size != tgt_batch_size and is_batched:
679
+ raise ValueError("The number of batch size for 'src' and 'tgt' must be equal.")
680
+
681
+ if src.shape[-1] != self.d_model or tgt.shape[-1] != self.d_model:
682
+ raise ValueError("The number of features for 'src' and 'tgt' must be equal to `d_model`.")
683
+
684
+ memory = self.encoder(src, src_mask=src_mask, src_key_padding_mask=src_key_padding_mask)
685
+ output = self.decoder(tgt, memory, tgt_mask=tgt_mask, memory_mask=memory_mask,
686
+ tgt_key_padding_mask=tgt_key_padding_mask,
687
+ memory_key_padding_mask=memory_key_padding_mask)
688
+ return output
689
+
690
+
691
+ def _get_activation_fn(activation: str):
692
+ if activation == "relu":
693
+ return ops.relu
694
+ if activation == "gelu":
695
+ return ops.gelu
696
+
697
+ raise ValueError(f"The activation must be relu/gelu, but get {activation}")
698
+
699
+
700
+ def _get_clones(module, N):
701
+ return CellList([copy.deepcopy(module) for i in range(N)])