mindspore 2.0.0rc1__cp38-cp38-manylinux1_x86_64.whl → 2.2.0__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (884) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Third_Party_Open_Source_Software_Notice +2 -2
  3. mindspore/__init__.py +5 -2
  4. mindspore/_akg/akg/build_module.py +5 -6
  5. mindspore/_akg/akg/composite/build_module.py +49 -16
  6. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  7. mindspore/_akg/akg/config/repository.json +195 -0
  8. mindspore/_akg/akg/global_configs.py +5 -1
  9. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  10. mindspore/_akg/akg/tvm/api.py +4 -3
  11. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  12. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  13. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  14. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  15. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  16. mindspore/_akg/akg/tvm/build_module.py +16 -1
  17. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  18. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  19. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  20. mindspore/_akg/akg/tvm/module.py +1 -2
  21. mindspore/_akg/akg/tvm/stmt.py +2 -2
  22. mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
  23. mindspore/_akg/akg/utils/kernel_exec.py +58 -260
  24. mindspore/_akg/akg/utils/op_dsl.py +17 -1
  25. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  26. mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
  27. mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
  28. mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
  29. mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
  30. mindspore/_check_jit_forbidden_api.py +5 -1
  31. mindspore/_checkparam.py +79 -62
  32. mindspore/_extends/graph_kernel/__init__.py +0 -1
  33. mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
  34. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  35. mindspore/_extends/graph_kernel/splitter.py +1 -9
  36. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
  37. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
  38. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  39. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
  40. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
  41. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  42. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  43. mindspore/_extends/parse/__init__.py +19 -17
  44. mindspore/_extends/parse/namespace.py +7 -36
  45. mindspore/_extends/parse/parser.py +375 -189
  46. mindspore/_extends/parse/resources.py +36 -41
  47. mindspore/_extends/parse/standard_method.py +350 -245
  48. mindspore/_extends/parse/trope.py +2 -12
  49. mindspore/_extends/remote/kernel_build_server.py +24 -7
  50. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  51. mindspore/_install_custom.py +43 -0
  52. mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
  53. mindspore/amp.py +85 -19
  54. mindspore/bin/cache_admin +0 -0
  55. mindspore/bin/cache_server +0 -0
  56. mindspore/boost/base.py +2 -2
  57. mindspore/boost/boost.py +27 -32
  58. mindspore/boost/boost_cell_wrapper.py +37 -13
  59. mindspore/boost/grad_accumulation.py +1 -1
  60. mindspore/boost/grad_freeze.py +34 -6
  61. mindspore/boost/group_loss_scale_manager.py +15 -14
  62. mindspore/boost/less_batch_normalization.py +28 -3
  63. mindspore/common/__init__.py +15 -11
  64. mindspore/common/_auto_dynamic.py +68 -0
  65. mindspore/common/_jit_fallback_utils.py +111 -0
  66. mindspore/common/_register_for_adapter.py +17 -5
  67. mindspore/common/_register_for_tensor.py +2 -2
  68. mindspore/common/_stub_tensor.py +18 -15
  69. mindspore/common/_utils.py +31 -7
  70. mindspore/common/api.py +269 -101
  71. mindspore/common/auto_dynamic_shape.py +498 -0
  72. mindspore/common/dtype.py +61 -21
  73. mindspore/common/dump.py +9 -7
  74. mindspore/common/initializer.py +106 -76
  75. mindspore/common/jit_config.py +35 -14
  76. mindspore/common/lazy_inline.py +187 -0
  77. mindspore/common/mindir_util.py +101 -0
  78. mindspore/common/mutable.py +10 -13
  79. mindspore/common/parameter.py +246 -55
  80. mindspore/common/seed.py +13 -7
  81. mindspore/common/sparse_tensor.py +29 -33
  82. mindspore/common/tensor.py +907 -251
  83. mindspore/communication/__init__.py +7 -4
  84. mindspore/communication/_comm_helper.py +84 -4
  85. mindspore/communication/management.py +160 -88
  86. mindspore/config/op_info.config +99 -75
  87. mindspore/config/super_bar_config.json +36 -4
  88. mindspore/context.py +526 -219
  89. mindspore/dataset/__init__.py +9 -46
  90. mindspore/dataset/audio/__init__.py +4 -19
  91. mindspore/dataset/audio/transforms.py +545 -233
  92. mindspore/dataset/audio/utils.py +21 -18
  93. mindspore/dataset/callback/ds_callback.py +42 -13
  94. mindspore/dataset/core/config.py +158 -100
  95. mindspore/dataset/core/validator_helpers.py +1 -63
  96. mindspore/dataset/debug/debug_hook.py +45 -13
  97. mindspore/dataset/debug/pre_defined_hook.py +5 -5
  98. mindspore/dataset/engine/__init__.py +0 -5
  99. mindspore/dataset/engine/cache_client.py +38 -15
  100. mindspore/dataset/engine/datasets.py +615 -278
  101. mindspore/dataset/engine/datasets_audio.py +154 -283
  102. mindspore/dataset/engine/datasets_standard_format.py +104 -116
  103. mindspore/dataset/engine/datasets_text.py +443 -326
  104. mindspore/dataset/engine/datasets_user_defined.py +251 -164
  105. mindspore/dataset/engine/datasets_vision.py +839 -1443
  106. mindspore/dataset/engine/iterators.py +11 -4
  107. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
  108. mindspore/dataset/engine/obs/util.py +3 -0
  109. mindspore/dataset/engine/offload.py +6 -6
  110. mindspore/dataset/engine/queue.py +15 -14
  111. mindspore/dataset/engine/samplers.py +39 -23
  112. mindspore/dataset/engine/serializer_deserializer.py +22 -6
  113. mindspore/dataset/engine/validators.py +21 -331
  114. mindspore/dataset/text/__init__.py +5 -33
  115. mindspore/dataset/text/transforms.py +334 -165
  116. mindspore/dataset/text/utils.py +215 -145
  117. mindspore/dataset/transforms/__init__.py +1 -1
  118. mindspore/dataset/transforms/c_transforms.py +3 -2
  119. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  120. mindspore/dataset/transforms/transforms.py +174 -71
  121. mindspore/dataset/utils/browse_dataset.py +25 -17
  122. mindspore/dataset/utils/line_reader.py +24 -21
  123. mindspore/dataset/vision/__init__.py +5 -26
  124. mindspore/dataset/vision/c_transforms.py +177 -165
  125. mindspore/dataset/vision/py_transforms.py +114 -119
  126. mindspore/dataset/vision/py_transforms_util.py +54 -51
  127. mindspore/dataset/vision/transforms.py +1127 -381
  128. mindspore/dataset/vision/utils.py +54 -38
  129. mindspore/dataset/vision/validators.py +12 -2
  130. mindspore/experimental/map_parameter.py +38 -4
  131. mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
  132. mindspore/experimental/optim/adam.py +192 -0
  133. mindspore/experimental/optim/adamw.py +181 -0
  134. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  135. mindspore/experimental/optim/optimizer.py +252 -0
  136. mindspore/experimental/optim/sgd.py +147 -0
  137. mindspore/gen_ops.py +273 -0
  138. mindspore/include/OWNERS +1 -2
  139. mindspore/include/api/context.h +21 -1
  140. mindspore/include/api/data_type.h +2 -1
  141. mindspore/include/api/graph.h +0 -15
  142. mindspore/include/api/kernel.h +2 -0
  143. mindspore/include/api/kernel_api.h +37 -12
  144. mindspore/include/api/model.h +29 -42
  145. mindspore/include/api/model_group.h +14 -3
  146. mindspore/include/api/model_parallel_runner.h +18 -2
  147. mindspore/include/api/serialization.h +26 -0
  148. mindspore/include/api/status.h +1 -0
  149. mindspore/include/api/types.h +38 -4
  150. mindspore/include/c_api/ms/abstract.h +67 -0
  151. mindspore/include/c_api/ms/attribute.h +197 -0
  152. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  153. mindspore/include/c_api/ms/base/macros.h +32 -0
  154. mindspore/include/c_api/ms/base/status.h +33 -0
  155. mindspore/include/c_api/ms/base/types.h +282 -0
  156. mindspore/include/c_api/ms/context.h +102 -0
  157. mindspore/include/c_api/ms/graph.h +160 -0
  158. mindspore/include/c_api/ms/node.h +606 -0
  159. mindspore/include/c_api/ms/tensor.h +161 -0
  160. mindspore/include/c_api/ms/value.h +84 -0
  161. mindspore/include/c_api/status_c.h +3 -0
  162. mindspore/include/dataset/constants.h +6 -12
  163. mindspore/include/dataset/execute.h +23 -13
  164. mindspore/include/dataset/text.h +26 -26
  165. mindspore/include/dataset/transforms.h +25 -31
  166. mindspore/include/dataset/vision.h +60 -60
  167. mindspore/include/dataset/vision_ascend.h +5 -6
  168. mindspore/include/dataset/vision_lite.h +17 -17
  169. mindspore/include/mindapi/base/format.h +0 -1
  170. mindspore/include/mindapi/base/type_id.h +2 -1
  171. mindspore/include/mindapi/base/types.h +5 -1
  172. mindspore/lib/libdnnl.so.2 +0 -0
  173. mindspore/lib/libjemalloc.so.2 +0 -0
  174. mindspore/lib/libmindspore.so +0 -0
  175. mindspore/lib/libmindspore_backend.so +0 -0
  176. mindspore/lib/libmindspore_common.so +0 -0
  177. mindspore/lib/libmindspore_core.so +0 -0
  178. mindspore/lib/libmindspore_glog.so.0 +0 -0
  179. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  180. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  181. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  182. mindspore/lib/libmindspore_shared_lib.so +0 -0
  183. mindspore/lib/libmpi_adapter.so +0 -0
  184. mindspore/lib/libnnacl.so +0 -0
  185. mindspore/lib/libopencv_core.so.4.5 +0 -0
  186. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  187. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  188. mindspore/lib/libps_cache.so +0 -0
  189. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  190. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  191. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
  192. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  193. mindspore/lib/plugin/ascend/libakg.so +0 -0
  194. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  195. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  196. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  197. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  198. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  199. mindspore/lib/plugin/cpu/libakg.so +0 -0
  200. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  201. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  202. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  203. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  204. mindspore/lib/plugin/gpu10.1/libnvidia_collective.so +0 -0
  205. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  206. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  207. mindspore/lib/plugin/gpu11.1/libnvidia_collective.so +0 -0
  208. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  209. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  210. mindspore/lib/plugin/gpu11.6/libnvidia_collective.so +0 -0
  211. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  212. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  213. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  214. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  215. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  216. mindspore/log.py +9 -6
  217. mindspore/mindrecord/filereader.py +33 -4
  218. mindspore/mindrecord/filewriter.py +70 -35
  219. mindspore/mindrecord/mindpage.py +40 -34
  220. mindspore/mindrecord/shardreader.py +1 -1
  221. mindspore/mindrecord/shardsegment.py +1 -1
  222. mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
  223. mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
  224. mindspore/mindrecord/tools/csv_to_mr.py +29 -13
  225. mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
  226. mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
  227. mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
  228. mindspore/nn/cell.py +463 -169
  229. mindspore/nn/dynamic_lr.py +47 -43
  230. mindspore/nn/layer/activation.py +225 -82
  231. mindspore/nn/layer/basic.py +121 -79
  232. mindspore/nn/layer/channel_shuffle.py +21 -21
  233. mindspore/nn/layer/combined.py +33 -26
  234. mindspore/nn/layer/container.py +277 -22
  235. mindspore/nn/layer/conv.py +441 -304
  236. mindspore/nn/layer/dense.py +19 -13
  237. mindspore/nn/layer/embedding.py +62 -49
  238. mindspore/nn/layer/flash_attention.py +264 -0
  239. mindspore/nn/layer/image.py +50 -39
  240. mindspore/nn/layer/math.py +62 -51
  241. mindspore/nn/layer/normalization.py +219 -167
  242. mindspore/nn/layer/padding.py +58 -70
  243. mindspore/nn/layer/pooling.py +334 -287
  244. mindspore/nn/layer/rnn_cells.py +53 -38
  245. mindspore/nn/layer/rnns.py +59 -56
  246. mindspore/nn/layer/thor_layer.py +52 -44
  247. mindspore/nn/layer/timedistributed.py +6 -4
  248. mindspore/nn/layer/transformer.py +284 -164
  249. mindspore/nn/learning_rate_schedule.py +34 -25
  250. mindspore/nn/loss/__init__.py +3 -2
  251. mindspore/nn/loss/loss.py +554 -311
  252. mindspore/nn/optim/ada_grad.py +12 -9
  253. mindspore/nn/optim/adadelta.py +14 -11
  254. mindspore/nn/optim/adafactor.py +19 -16
  255. mindspore/nn/optim/adam.py +62 -47
  256. mindspore/nn/optim/adamax.py +13 -10
  257. mindspore/nn/optim/adasum.py +12 -8
  258. mindspore/nn/optim/asgd.py +10 -9
  259. mindspore/nn/optim/ftrl.py +20 -17
  260. mindspore/nn/optim/lamb.py +16 -12
  261. mindspore/nn/optim/lars.py +8 -6
  262. mindspore/nn/optim/lazyadam.py +25 -20
  263. mindspore/nn/optim/momentum.py +10 -7
  264. mindspore/nn/optim/optimizer.py +61 -9
  265. mindspore/nn/optim/proximal_ada_grad.py +14 -13
  266. mindspore/nn/optim/rmsprop.py +17 -13
  267. mindspore/nn/optim/rprop.py +30 -17
  268. mindspore/nn/optim/sgd.py +40 -23
  269. mindspore/nn/optim/thor.py +24 -26
  270. mindspore/nn/probability/bijector/bijector.py +11 -11
  271. mindspore/nn/probability/bijector/exp.py +1 -1
  272. mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
  273. mindspore/nn/probability/bijector/invert.py +1 -1
  274. mindspore/nn/probability/bijector/power_transform.py +29 -29
  275. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  276. mindspore/nn/probability/bijector/softplus.py +5 -5
  277. mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
  278. mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
  279. mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
  280. mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
  281. mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
  282. mindspore/nn/probability/distribution/_utils/utils.py +1 -1
  283. mindspore/nn/probability/distribution/bernoulli.py +9 -9
  284. mindspore/nn/probability/distribution/beta.py +8 -8
  285. mindspore/nn/probability/distribution/categorical.py +23 -15
  286. mindspore/nn/probability/distribution/cauchy.py +5 -6
  287. mindspore/nn/probability/distribution/distribution.py +3 -3
  288. mindspore/nn/probability/distribution/exponential.py +4 -4
  289. mindspore/nn/probability/distribution/gamma.py +10 -10
  290. mindspore/nn/probability/distribution/geometric.py +8 -8
  291. mindspore/nn/probability/distribution/gumbel.py +8 -9
  292. mindspore/nn/probability/distribution/half_normal.py +5 -5
  293. mindspore/nn/probability/distribution/laplace.py +5 -5
  294. mindspore/nn/probability/distribution/log_normal.py +12 -11
  295. mindspore/nn/probability/distribution/logistic.py +8 -8
  296. mindspore/nn/probability/distribution/normal.py +6 -5
  297. mindspore/nn/probability/distribution/poisson.py +10 -11
  298. mindspore/nn/probability/distribution/student_t.py +8 -9
  299. mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
  300. mindspore/nn/probability/distribution/uniform.py +11 -11
  301. mindspore/nn/reinforcement/tensor_array.py +2 -2
  302. mindspore/nn/sparse/sparse.py +9 -9
  303. mindspore/nn/wrap/cell_wrapper.py +188 -63
  304. mindspore/nn/wrap/grad_reducer.py +21 -12
  305. mindspore/nn/wrap/loss_scale.py +136 -49
  306. mindspore/numpy/__init__.py +4 -4
  307. mindspore/numpy/array_creations.py +55 -56
  308. mindspore/numpy/array_ops.py +134 -35
  309. mindspore/numpy/logic_ops.py +66 -20
  310. mindspore/numpy/math_ops.py +142 -139
  311. mindspore/numpy/utils_const.py +2 -2
  312. mindspore/offline_debug/convert_async.py +2 -2
  313. mindspore/ops/_grad_experimental/__init__.py +7 -5
  314. mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
  315. mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
  316. mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
  317. mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
  318. mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
  319. mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
  320. mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
  321. mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
  322. mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
  323. mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
  324. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
  325. mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
  326. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  327. mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
  328. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
  329. mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
  330. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
  331. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
  332. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
  333. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
  334. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  335. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
  336. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
  337. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
  338. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  339. mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
  340. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
  341. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  342. mindspore/ops/_op_impl/aicpu/cast.py +52 -0
  343. mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
  344. mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
  345. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  346. mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
  347. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  348. mindspore/ops/_op_impl/aicpu/eye.py +4 -4
  349. mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
  350. mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
  351. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  352. mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
  353. mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
  354. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  355. mindspore/ops/_op_impl/aicpu/lu.py +39 -0
  356. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  357. mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
  358. mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
  359. mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
  360. mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
  361. mindspore/ops/_op_impl/aicpu/median.py +1 -0
  362. mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
  363. mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
  364. mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
  365. mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
  366. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  367. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  368. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  369. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  370. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  371. mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
  372. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
  373. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
  374. mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
  375. mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
  376. mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
  377. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  378. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  379. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
  380. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
  381. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  382. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  383. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  384. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  385. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  386. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
  387. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
  388. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
  389. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
  390. mindspore/ops/_op_impl/tbe/__init__.py +6 -4
  391. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  392. mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
  393. mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
  394. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
  395. mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
  396. mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
  397. mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
  398. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  399. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
  400. mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
  401. mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
  402. mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
  403. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
  404. mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
  405. mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
  406. mindspore/ops/_op_impl/tbe/im2col.py +4 -4
  407. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  408. mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
  409. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
  410. mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
  411. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  412. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
  413. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  414. mindspore/ops/_primitive_cache.py +1 -1
  415. mindspore/ops/_tracefunc.py +241 -0
  416. mindspore/ops/_utils/utils.py +10 -2
  417. mindspore/ops/_vmap/vmap_array_ops.py +5 -3
  418. mindspore/ops/_vmap/vmap_base.py +5 -4
  419. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  420. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  421. mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
  422. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  423. mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
  424. mindspore/ops/arg_dtype_cast.py +54 -0
  425. mindspore/ops/composite/__init__.py +7 -5
  426. mindspore/ops/composite/base.py +78 -34
  427. mindspore/ops/composite/math_ops.py +5 -695
  428. mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
  429. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
  430. mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
  431. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  432. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  433. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
  434. mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
  435. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
  436. mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
  437. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
  438. mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
  439. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
  440. mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
  441. mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
  442. mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
  443. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
  444. mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
  445. mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
  446. mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
  447. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  448. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  449. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
  450. mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
  451. mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
  452. mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
  453. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  454. mindspore/ops/deprecated.py +304 -0
  455. mindspore/ops/function/__init__.py +41 -4
  456. mindspore/ops/function/array_func.py +1108 -467
  457. mindspore/ops/function/clip_func.py +94 -27
  458. mindspore/ops/function/debug_func.py +3 -1
  459. mindspore/ops/function/grad/grad_func.py +82 -73
  460. mindspore/ops/function/image_func.py +28 -12
  461. mindspore/ops/function/linalg_func.py +135 -39
  462. mindspore/ops/function/math_func.py +3779 -894
  463. mindspore/ops/function/nn_func.py +1584 -657
  464. mindspore/ops/function/parameter_func.py +13 -3
  465. mindspore/ops/function/random_func.py +247 -153
  466. mindspore/ops/function/sparse_func.py +14 -11
  467. mindspore/ops/function/sparse_unary_func.py +173 -47
  468. mindspore/ops/function/spectral_func.py +8 -4
  469. mindspore/ops/function/vmap_func.py +8 -7
  470. mindspore/ops/functional.py +47 -16
  471. mindspore/ops/op_info_register.py +346 -86
  472. mindspore/ops/operations/__init__.py +38 -22
  473. mindspore/ops/operations/_grad_ops.py +145 -149
  474. mindspore/ops/operations/_inner_ops.py +298 -56
  475. mindspore/ops/operations/_ms_kernel.py +3 -3
  476. mindspore/ops/operations/_quant_ops.py +24 -28
  477. mindspore/ops/operations/_rl_inner_ops.py +9 -7
  478. mindspore/ops/operations/_scalar_ops.py +115 -0
  479. mindspore/ops/operations/_sequence_ops.py +148 -10
  480. mindspore/ops/operations/_tensor_array.py +1 -1
  481. mindspore/ops/operations/_thor_ops.py +2 -2
  482. mindspore/ops/operations/array_ops.py +1239 -561
  483. mindspore/ops/operations/comm_ops.py +166 -90
  484. mindspore/ops/operations/control_ops.py +3 -3
  485. mindspore/ops/operations/custom_ops.py +124 -102
  486. mindspore/ops/operations/debug_ops.py +24 -11
  487. mindspore/ops/operations/image_ops.py +86 -71
  488. mindspore/ops/operations/inner_ops.py +18 -13
  489. mindspore/ops/operations/linalg_ops.py +30 -11
  490. mindspore/ops/operations/math_ops.py +1730 -435
  491. mindspore/ops/operations/nn_ops.py +1953 -943
  492. mindspore/ops/operations/other_ops.py +65 -43
  493. mindspore/ops/operations/random_ops.py +258 -98
  494. mindspore/ops/operations/rl_ops.py +4 -36
  495. mindspore/ops/operations/sparse_ops.py +38 -33
  496. mindspore/ops/operations/spectral_ops.py +8 -4
  497. mindspore/ops/primitive.py +66 -44
  498. mindspore/ops/signature.py +5 -5
  499. mindspore/parallel/_auto_parallel_context.py +80 -19
  500. mindspore/parallel/_cost_model_context.py +42 -0
  501. mindspore/parallel/_offload_context.py +162 -72
  502. mindspore/parallel/_parallel_serialization.py +2 -2
  503. mindspore/parallel/_ps_context.py +16 -4
  504. mindspore/parallel/_recovery_context.py +2 -1
  505. mindspore/parallel/_tensor.py +15 -13
  506. mindspore/parallel/_transformer/layers.py +8 -6
  507. mindspore/parallel/_transformer/loss.py +1 -0
  508. mindspore/parallel/_transformer/moe.py +7 -7
  509. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  510. mindspore/parallel/_transformer/transformer.py +34 -14
  511. mindspore/parallel/_utils.py +36 -14
  512. mindspore/parallel/algo_parameter_config.py +114 -20
  513. mindspore/parallel/checkpoint_transform.py +16 -18
  514. mindspore/parallel/shard.py +16 -13
  515. mindspore/profiler/__init__.py +1 -1
  516. mindspore/profiler/common/struct_type.py +3 -3
  517. mindspore/profiler/common/util.py +3 -2
  518. mindspore/profiler/envprofiling.py +11 -4
  519. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  520. mindspore/profiler/parser/ascend_flops_generator.py +94 -0
  521. mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
  522. mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
  523. mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
  524. mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
  525. mindspore/profiler/parser/ascend_op_generator.py +276 -0
  526. mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
  527. mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
  528. mindspore/profiler/parser/base_timeline_generator.py +11 -7
  529. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
  530. mindspore/profiler/parser/flops_parser.py +15 -11
  531. mindspore/profiler/parser/framework_parser.py +92 -73
  532. mindspore/profiler/parser/hccl_parser.py +16 -12
  533. mindspore/profiler/parser/integrator.py +22 -11
  534. mindspore/profiler/parser/memory_usage_parser.py +36 -11
  535. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  536. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  537. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  538. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  539. mindspore/profiler/parser/optime_parser.py +1 -1
  540. mindspore/profiler/parser/profiler_info.py +4 -5
  541. mindspore/profiler/parser/step_trace_parser.py +11 -14
  542. mindspore/profiler/profiling.py +678 -377
  543. mindspore/rewrite/api/node.py +211 -54
  544. mindspore/rewrite/api/node_type.py +5 -0
  545. mindspore/rewrite/api/pattern_engine.py +22 -23
  546. mindspore/rewrite/api/scoped_value.py +20 -17
  547. mindspore/rewrite/api/symbol_tree.py +252 -106
  548. mindspore/rewrite/api/tree_node_helper.py +3 -0
  549. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  550. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  551. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  552. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
  553. mindspore/rewrite/common/rewrite_elog.py +5 -1
  554. mindspore/rewrite/namer.py +51 -51
  555. mindspore/rewrite/namespace.py +14 -5
  556. mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
  557. mindspore/rewrite/node/call_function.py +79 -0
  558. mindspore/rewrite/node/cell_container.py +135 -0
  559. mindspore/rewrite/node/control_flow.py +88 -0
  560. mindspore/rewrite/{node.py → node/node.py} +313 -247
  561. mindspore/rewrite/node/node_manager.py +254 -0
  562. mindspore/rewrite/node/node_topological_manager.py +243 -0
  563. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  564. mindspore/rewrite/parsers/assign_parser.py +225 -239
  565. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  566. mindspore/rewrite/parsers/class_def_parser.py +179 -218
  567. mindspore/rewrite/parsers/constant_parser.py +9 -6
  568. mindspore/rewrite/parsers/container_parser.py +9 -7
  569. mindspore/rewrite/parsers/for_parser.py +36 -15
  570. mindspore/rewrite/parsers/function_def_parser.py +23 -20
  571. mindspore/rewrite/parsers/if_parser.py +28 -24
  572. mindspore/rewrite/parsers/module_parser.py +202 -25
  573. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  574. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  575. mindspore/rewrite/parsers/return_parser.py +6 -6
  576. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  577. mindspore/rewrite/sparsify/sparsify.py +4 -1
  578. mindspore/rewrite/sparsify/utils.py +11 -5
  579. mindspore/rewrite/symbol_tree.py +577 -732
  580. mindspore/rewrite/symbol_tree_builder.py +9 -175
  581. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  582. mindspore/run_check/_check_version.py +46 -39
  583. mindspore/run_check/run_check.py +3 -2
  584. mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
  585. mindspore/safeguard/rewrite_obfuscation.py +517 -0
  586. mindspore/scipy/__init__.py +1 -1
  587. mindspore/scipy/linalg.py +67 -61
  588. mindspore/scipy/ops.py +5 -41
  589. mindspore/scipy/ops_grad.py +3 -2
  590. mindspore/scipy/ops_wrapper.py +5 -5
  591. mindspore/scipy/optimize/line_search.py +8 -8
  592. mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
  593. mindspore/scipy/optimize/minimize.py +16 -12
  594. mindspore/scipy/utils.py +1 -52
  595. mindspore/scipy/utils_const.py +4 -4
  596. mindspore/train/__init__.py +4 -4
  597. mindspore/train/_utils.py +13 -5
  598. mindspore/train/amp.py +410 -148
  599. mindspore/train/anf_ir_pb2.py +16 -4
  600. mindspore/train/callback/_backup_and_restore.py +8 -11
  601. mindspore/train/callback/_callback.py +80 -3
  602. mindspore/train/callback/_checkpoint.py +82 -51
  603. mindspore/train/callback/_early_stop.py +12 -15
  604. mindspore/train/callback/_history.py +1 -1
  605. mindspore/train/callback/_lambda_callback.py +13 -13
  606. mindspore/train/callback/_landscape.py +21 -17
  607. mindspore/train/callback/_loss_monitor.py +9 -10
  608. mindspore/train/callback/_on_request_exit.py +16 -33
  609. mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
  610. mindspore/train/callback/_summary_collector.py +44 -30
  611. mindspore/train/callback/_time_monitor.py +62 -12
  612. mindspore/train/data_sink.py +10 -16
  613. mindspore/train/dataset_helper.py +154 -86
  614. mindspore/train/loss_scale_manager.py +14 -9
  615. mindspore/train/metrics/__init__.py +10 -2
  616. mindspore/train/metrics/accuracy.py +1 -1
  617. mindspore/train/metrics/auc.py +1 -1
  618. mindspore/train/metrics/bleu_score.py +2 -2
  619. mindspore/train/metrics/confusion_matrix.py +14 -14
  620. mindspore/train/metrics/cosine_similarity.py +3 -3
  621. mindspore/train/metrics/dice.py +1 -1
  622. mindspore/train/metrics/fbeta.py +1 -1
  623. mindspore/train/metrics/hausdorff_distance.py +8 -6
  624. mindspore/train/metrics/mean_surface_distance.py +5 -4
  625. mindspore/train/metrics/metric.py +49 -17
  626. mindspore/train/metrics/occlusion_sensitivity.py +4 -4
  627. mindspore/train/metrics/perplexity.py +1 -1
  628. mindspore/train/metrics/precision.py +2 -2
  629. mindspore/train/metrics/recall.py +2 -3
  630. mindspore/train/metrics/roc.py +7 -7
  631. mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
  632. mindspore/train/metrics/topk.py +7 -4
  633. mindspore/train/mind_ir_pb2.py +193 -48
  634. mindspore/train/model.py +377 -133
  635. mindspore/train/serialization.py +697 -245
  636. mindspore/train/summary/_summary_adapter.py +5 -2
  637. mindspore/train/summary/_writer_pool.py +4 -3
  638. mindspore/train/summary/summary_record.py +25 -23
  639. mindspore/train/train_thor/convert_utils.py +39 -23
  640. mindspore/train/train_thor/dataset_helper.py +4 -3
  641. mindspore/train/train_thor/model_thor.py +8 -8
  642. mindspore/version.py +1 -1
  643. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
  644. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +647 -818
  645. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
  646. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  647. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  648. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  649. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  650. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  651. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  652. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  653. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  654. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  655. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  656. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  657. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  658. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  659. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  660. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  661. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  662. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  663. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  664. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  665. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  666. mindspore/_extends/graph_kernel/expander.py +0 -80
  667. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
  668. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  669. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  670. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  671. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  672. mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
  673. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  674. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  675. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  676. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  677. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  678. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  679. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  680. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  681. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  682. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  683. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  684. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  685. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  686. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  687. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  688. mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
  689. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  690. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  691. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  692. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  693. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  694. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  695. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  696. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  697. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  698. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  699. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  700. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  701. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  702. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  703. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  704. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  705. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  706. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  707. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  708. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  709. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  710. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  711. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  712. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  713. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  714. mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
  715. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  716. mindspore/_extends/parse/jit_fallback_modules.py +0 -51
  717. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  718. mindspore/dataset/engine/graphdata.py +0 -1586
  719. mindspore/include/api/net.h +0 -142
  720. mindspore/ops/_grad/grad_array_ops.py +0 -1347
  721. mindspore/ops/_grad/grad_clip_ops.py +0 -84
  722. mindspore/ops/_grad/grad_debug_ops.py +0 -68
  723. mindspore/ops/_grad/grad_inner_ops.py +0 -235
  724. mindspore/ops/_grad/grad_math_ops.py +0 -1684
  725. mindspore/ops/_grad/grad_nn_ops.py +0 -1529
  726. mindspore/ops/_grad/grad_other_ops.py +0 -89
  727. mindspore/ops/_grad/grad_sequence_ops.py +0 -296
  728. mindspore/ops/_grad/grad_sparse.py +0 -323
  729. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
  730. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
  731. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  732. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  733. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  734. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
  735. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
  736. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
  737. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
  738. mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
  739. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
  740. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
  741. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  742. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
  743. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  744. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
  745. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  746. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
  747. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
  748. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
  749. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  750. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  751. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
  752. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
  753. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
  754. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
  755. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
  756. mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
  757. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
  758. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
  759. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
  760. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  761. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
  762. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  763. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  764. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
  765. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
  766. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
  767. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  768. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  769. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  770. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
  771. mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
  772. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  773. mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
  774. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
  775. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
  776. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
  777. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
  778. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
  779. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
  780. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  781. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
  782. mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
  783. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
  784. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
  785. mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
  786. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  787. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
  788. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
  789. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
  790. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
  791. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
  792. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
  793. mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
  794. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  795. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
  796. mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
  797. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
  798. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
  799. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
  800. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
  801. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
  802. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
  803. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
  804. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
  805. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
  806. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
  807. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  808. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  809. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  810. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
  811. mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
  812. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  813. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  814. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
  815. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
  816. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
  817. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
  818. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  819. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  820. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  821. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
  822. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
  823. mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
  824. mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
  825. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
  826. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  827. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
  828. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
  829. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
  830. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
  831. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
  832. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
  833. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
  834. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
  835. mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
  836. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  837. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  838. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
  839. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
  840. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
  841. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  842. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
  843. mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
  844. mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
  845. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
  846. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  847. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
  848. mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
  849. mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
  850. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
  851. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  852. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
  853. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
  854. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  855. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
  856. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
  857. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  858. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  859. mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
  860. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
  861. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
  862. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
  863. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
  864. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  865. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
  866. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
  867. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
  868. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
  869. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  870. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  871. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
  872. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
  873. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
  874. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
  875. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
  876. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
  877. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
  878. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
  879. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  880. mindspore/rewrite/node_visitor.py +0 -44
  881. mindspore/rewrite/topological_manager.py +0 -203
  882. mindspore/scipy/sparse/linalg.py +0 -192
  883. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
  884. {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
@@ -112,8 +112,8 @@ class Adagrad(Optimizer):
112
112
  If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
113
113
  one group of `params`.
114
114
 
115
- accum (float): The starting value for :math:`h`, must be zero or positive values. Default: 0.1.
116
- learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 0.001.
115
+ accum (float): The starting value for :math:`h`, must be zero or positive values. Default: ``0.1`` .
116
+ learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``0.001`` .
117
117
 
118
118
  - float: The fixed learning rate value. Must be equal to or greater than 0.
119
119
 
@@ -127,13 +127,13 @@ class Adagrad(Optimizer):
127
127
  - LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
128
128
  LearningRateSchedule with step as the input to get the learning rate of current step.
129
129
 
130
- update_slots (bool): Whether the :math:`h` will be updated. Default: True.
130
+ update_slots (bool): Whether the :math:`h` will be updated. Default: ``True`` .
131
131
  loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value.
132
132
  Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
133
133
  `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
134
134
  `FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
135
- Default: 1.0.
136
- weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
135
+ Default: ``1.0`` .
136
+ weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
137
137
 
138
138
  - float: The fixed weight decay value. Must be equal to or greater than 0.
139
139
 
@@ -147,7 +147,7 @@ class Adagrad(Optimizer):
147
147
  in optimizer.
148
148
 
149
149
  Outputs:
150
- Tensor[bool], the value is True.
150
+ Tensor[bool], the value is ``True`` .
151
151
 
152
152
  Raises:
153
153
  TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
@@ -162,10 +162,12 @@ class Adagrad(Optimizer):
162
162
  ``Ascend`` ``GPU`` ``CPU``
163
163
 
164
164
  Examples:
165
- >>> import mindspore as ms
166
- >>> from mindspore import nn
165
+ >>> import mindspore
166
+ >>> import mindspore.nn as nn
167
167
  >>>
168
- >>> net = Net()
168
+ >>> # Define the network structure of LeNet5. Refer to
169
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
170
+ >>> net = LeNet5()
169
171
  >>> #1) All parameters use the same learning rate and weight decay
170
172
  >>> optim = nn.Adagrad(params=net.trainable_params())
171
173
  >>>
@@ -203,6 +205,7 @@ class Adagrad(Optimizer):
203
205
  grads = self.gradients_centralization(grads)
204
206
  grads = self.scale_grad(grads)
205
207
  lr = self.get_lr()
208
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
206
209
  if self.is_group_lr:
207
210
  success = self.map_reverse(F.partial(_ada_grad_opt, self.opt), lr, params, accum,
208
211
  grads)
@@ -84,8 +84,8 @@ class Adadelta(Optimizer):
84
84
  to get the weight decay value of current step.
85
85
 
86
86
  - grad_centralization: Optional. Must be Boolean. If "grad_centralization" is in the keys, the set value
87
- will be used. If not, the `grad_centralization` is False by default. This configuration only works on the
88
- convolution layer.
87
+ will be used. If not, the `grad_centralization` is ``False`` by default. This configuration only works
88
+ on the convolution layer.
89
89
 
90
90
  - order_params: Optional. When parameters is grouped, this usually is used to maintain the order of
91
91
  parameters that appeared in the network to improve performance. The value should be parameters whose
@@ -93,7 +93,7 @@ class Adadelta(Optimizer):
93
93
  If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
94
94
  one group of `params`.
95
95
 
96
- learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 1.0.
96
+ learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``1.0`` .
97
97
 
98
98
  - float: The fixed learning rate value. Must be equal to or greater than 0.
99
99
 
@@ -107,14 +107,14 @@ class Adadelta(Optimizer):
107
107
  - LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
108
108
  LearningRateSchedule with step as the input to get the learning rate of current step.
109
109
 
110
- rho (float): Decay rate, must be in range [0.0, 1.0]. Default: 0.9.
111
- epsilon (float): A small value added for numerical stability, must be non-negative. Default: 1e-6.
110
+ rho (float): Decay rate, must be in range [0.0, 1.0]. Default: ``0.9`` .
111
+ epsilon (float): A small value added for numerical stability, must be non-negative. Default: ``1e-6`` .
112
112
  loss_scale (float): Value for the loss scale. It must be greater than 0.0. In general, use the default value.
113
113
  Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
114
- `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
114
+ `FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
115
115
  `FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
116
- Default: 1.0.
117
- weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
116
+ Default: ``1.0`` .
117
+ weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
118
118
 
119
119
  - float: The fixed weight decay value. Must be equal to or greater than 0.
120
120
 
@@ -128,7 +128,7 @@ class Adadelta(Optimizer):
128
128
  the `params` in optimizer. With float16 or float32 data type.
129
129
 
130
130
  Outputs:
131
- Tensor[bool], the value is True.
131
+ Tensor[bool], the value is ``True`` .
132
132
 
133
133
  Raises:
134
134
  TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
@@ -143,7 +143,9 @@ class Adadelta(Optimizer):
143
143
  ``Ascend`` ``GPU`` ``CPU``
144
144
 
145
145
  Examples:
146
- >>> from mindspore import nn, Model
146
+ >>> import mindspore as ms
147
+ >>> import mindspore.nn as nn
148
+ >>>
147
149
  >>> class Net(nn.Cell):
148
150
  ... def __init__(self):
149
151
  ... super(Net, self).__init__()
@@ -171,7 +173,7 @@ class Adadelta(Optimizer):
171
173
  >>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
172
174
  >>>
173
175
  >>> loss = nn.SoftmaxCrossEntropyWithLogits()
174
- >>> model = Model(net, loss_fn=loss, optimizer=optim)
176
+ >>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim)
175
177
  """
176
178
 
177
179
  @opt_init_args_register
@@ -192,6 +194,7 @@ class Adadelta(Optimizer):
192
194
  grads = self.gradients_centralization(grads)
193
195
  grads = self.scale_grad(grads)
194
196
  lr = self.get_lr()
197
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
195
198
  if self.is_group_lr:
196
199
  success = self.map_reverse(F.partial(_adadelta_opt, self.opt, self.rho, self.epsilon), lr, params,
197
200
  self.accum, self.accum_update, grads)
@@ -216,27 +216,29 @@ class AdaFactor(Optimizer):
216
216
 
217
217
  learning_rate (Union[float, Tensor]): A value or a graph for the learning rate.
218
218
  When the learning_rate is a Tensor in a 1D dimension.
219
- If the type of `learning_rate` is int, it will be converted to float. Default: None.
219
+ If the type of `learning_rate` is int, it will be converted to float. Default: ``None`` .
220
220
  eps (tuple): The regularization constans for square gradient and parameter scale respectively.
221
- default: (1e-30, 1e-3)
222
- clip_threshold (Union[float, Tensor]): The threshold of root mean square of final gradient update. default: 1.0
221
+ default: ``(1e-30, 1e-3)`` .
222
+ clip_threshold (Union[float, Tensor]): The threshold of root mean square of final gradient update.
223
+ default: ``1.0``.
223
224
  decay_rate (Union[float, Tensor]): The coefficient used to compute running averages of square gradient.
224
- default: 0.8
225
+ default: ``0.8`` .
225
226
  beta1 (float): The coefficient to computing running averages of gradient. Should be in range (0.0, 1.0).
226
- Default: None.
227
- weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0.
228
- scale_parameter (bool): If True, learning rate is scaled by root mean square of parameter. default: True
227
+ Default: ``None`` .
228
+ weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: ``0.0`` .
229
+ scale_parameter (bool): If True, learning rate is scaled by root mean square of parameter.
230
+ default: ``True`` .
229
231
  relative_step (bool): If True, time-dependent learning rate is computed instead of external learning rate.
230
- default: True
232
+ default: ``True`` .
231
233
  warmup_init (bool): The time-dependent learning rate computation depends on whether warm-up
232
- initialization is being used. default: False
234
+ initialization is being used. default: ``False`` .
233
235
  compression (bool): If True, the data type of the running averages exponent will be compression to float16.
234
- default: False
236
+ default: ``False`` .
235
237
  loss_scale (float): A floating point value for the loss scale. Should be greater than 0. In general, use the
236
238
  default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
237
- `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
239
+ `FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
238
240
  `FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
239
- Default: 1.0.
241
+ Default: ``1.0`` .
240
242
 
241
243
  Inputs:
242
244
  - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@@ -261,7 +263,9 @@ class AdaFactor(Optimizer):
261
263
  >>> import mindspore as ms
262
264
  >>> from mindspore import nn
263
265
  >>>
264
- >>> net = Net()
266
+ >>> # Define the network structure of LeNet5. Refer to
267
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
268
+ >>> net = LeNet5()
265
269
  >>> #1) Parameters use the default learning rate with None and weight decay with 0.
266
270
  >>> optim = nn.AdaFactor(params=net.trainable_params())
267
271
  >>>
@@ -329,7 +333,6 @@ class AdaFactor(Optimizer):
329
333
  self.weight_decay = trans_to_tensor(weight_decay)
330
334
  self.weight_decay_flag = bool(weight_decay)
331
335
 
332
- self.step = Parameter(Tensor(0, dtype=mstype.float32), name="train_step")
333
336
  self.scale_parameter = scale_parameter
334
337
  self.relative_step = relative_step
335
338
  self.warmup_init = warmup_init
@@ -407,8 +410,8 @@ class AdaFactor(Optimizer):
407
410
  def construct(self, gradients):
408
411
  gradients = self.flatten_gradients(gradients)
409
412
  lr = self.get_lr()
410
- F.assign_add(self.step, 1)
411
- step = self.step
413
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
414
+ step = F.assign_add(self.step, 1)
412
415
  if self.scale_lr and self.relative_step:
413
416
  if self.warmup_init:
414
417
  min_step = 1e-6 * step
@@ -499,7 +499,8 @@ def _run_opt_with_one_number_use_amsgrad(opt, sparse_opt, push, pull,
499
499
  (op_shape(param), op_shape(moment1), op_shape(moment2),
500
500
  op_shape(vhat))), param))
501
501
  else:
502
- success = F.depend(success, opt(param, moment1, moment2, vhat, beta1_power, beta2_power, lr, gradient))
502
+ success = F.depend(success, opt(param, moment1, moment2, vhat, beta1_power, beta2_power,
503
+ lr, beta1, beta2, eps, gradient))
503
504
  return success
504
505
 
505
506
 
@@ -634,7 +635,7 @@ class Adam(Optimizer):
634
635
  If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
635
636
  one group of `params`.
636
637
 
637
- learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 1e-3.
638
+ learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``1e-3`` .
638
639
 
639
640
  - float: The fixed learning rate value. Must be equal to or greater than 0.
640
641
 
@@ -649,22 +650,22 @@ class Adam(Optimizer):
649
650
  LearningRateSchedule with step as the input to get the learning rate of current step.
650
651
 
651
652
  beta1 (float): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0).
652
- Default: 0.9.
653
+ Default: ``0.9`` .
653
654
  beta2 (float): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0).
654
- Default: 0.999.
655
- eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0. Default:
656
- 1e-8.
655
+ Default: ``0.999`` .
656
+ eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
657
+ Default: ``1e-8`` .
657
658
  use_locking (bool): Whether to enable a lock to protect the updating process of variable tensors.
658
- If true, updates of the `w`, `m`, and `v` tensors will be protected by a lock.
659
- If false, the result is unpredictable. Default: False.
659
+ If ``true`` , updates of the `w`, `m`, and `v` tensors will be protected by a lock.
660
+ If ``false`` , the result is unpredictable. Default: ``False`` .
660
661
  use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
661
- If true, update the gradients using NAG.
662
- If false, update the gradients without using NAG. Default: False.
662
+ If ``true`` , update the gradients using NAG.
663
+ If ``false`` , update the gradients without using NAG. Default: ``False`` .
663
664
  use_amsgrad (bool): Whether to use Amsgrad algorithm to update the gradients.
664
- If true, update the gradients using Amsgrad.
665
- If false, update the gradients without using Amsgrad. Default: False.
665
+ If ``true`` , update the gradients using Amsgrad.
666
+ If ``false`` , update the gradients without using Amsgrad. Default: ``False`` .
666
667
 
667
- weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
668
+ weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
668
669
 
669
670
  - float: The fixed weight decay value. Must be equal to or greater than 0.
670
671
 
@@ -681,14 +682,14 @@ class Adam(Optimizer):
681
682
 
682
683
  kwargs:
683
684
 
684
- - use_lazy (bool): Whether to use Lazy Adam algorithm. Default: False.
685
- If true, apply lazy adam algorithm.
686
- If false, apply normal adam algorithm.
685
+ - use_lazy (bool): Whether to use Lazy Adam algorithm. Default: ``False`` .
686
+ If ``true`` , apply lazy adam algorithm.
687
+ If ``false`` , apply normal adam algorithm.
687
688
 
688
689
  - use_offload (bool): Whether to offload adam optimizer to host CPU and keep parameters being updated on
689
- the device in order to minimize the memory cost. Default: False.
690
- If true, apply offload adam.
691
- If false, apply normal adam.
690
+ the device in order to minimize the memory cost. Default: ``False`` .
691
+ If ``true`` , apply offload adam.
692
+ If ``false`` , apply normal adam.
692
693
 
693
694
  Inputs:
694
695
  - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@@ -697,6 +698,7 @@ class Adam(Optimizer):
697
698
  Tensor[bool], the value is True.
698
699
 
699
700
  Raises:
701
+ KeyError: If kwargs got keys other than 'use_lazy' or 'use_offload'.
700
702
  TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
701
703
  TypeError: If element of `parameters` is neither Parameter nor dict.
702
704
  TypeError: If `beta1`, `beta2`, `eps` or `loss_scale` is not a float.
@@ -705,8 +707,8 @@ class Adam(Optimizer):
705
707
  ValueError: If `loss_scale` or `eps` is less than or equal to 0.
706
708
  ValueError: If `beta1`, `beta2` is not in range (0.0, 1.0).
707
709
  ValueError: If `weight_decay` is less than 0.
708
- ValueError: If `use_lazy` and `use_offload` are both true.
709
- ValueError: If `use_amsgrad` is true and (`use_lazy` or `use_offload` is true).
710
+ ValueError: If `use_lazy` and `use_offload` are both ``true`` .
711
+ ValueError: If `use_amsgrad` is ``true`` and (`use_lazy` or `use_offload` is ``true`` ).
710
712
  ValueError: If `use_amsgrad` while using distributed training.
711
713
 
712
714
  Supported Platforms:
@@ -716,7 +718,9 @@ class Adam(Optimizer):
716
718
  >>> import mindspore as ms
717
719
  >>> from mindspore import nn
718
720
  >>>
719
- >>> net = Net()
721
+ >>> # Define the network structure of LeNet5. Refer to
722
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
723
+ >>> net = LeNet5()
720
724
  >>> #1) All parameters use the same learning rate and weight decay
721
725
  >>> optim = nn.Adam(params=net.trainable_params())
722
726
  >>>
@@ -741,6 +745,10 @@ class Adam(Optimizer):
741
745
  def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False,
742
746
  use_nesterov=False, weight_decay=0.0, loss_scale=1.0, use_amsgrad=False, **kwargs):
743
747
  super(Adam, self).__init__(learning_rate, params, weight_decay, loss_scale)
748
+ valid_keys = {'use_lazy', 'use_offload'}
749
+ if set(kwargs.keys()) - valid_keys:
750
+ raise KeyError(f"For 'Adam', invalid keys are passed as kwargs, supported keys are 'use_lazy' and"
751
+ f"'use_offload', but got {kwargs.keys()}.")
744
752
  use_lazy = kwargs.get('use_lazy', False)
745
753
  use_offload = kwargs.get('use_offload', False)
746
754
  _check_param_value(beta1, beta2, eps, self.cls_name)
@@ -790,7 +798,7 @@ class Adam(Optimizer):
790
798
  else:
791
799
  self._is_device = True
792
800
  if use_amsgrad:
793
- self.opt = P.ApplyAdamWithAmsgrad(beta1, beta2, eps, use_locking)
801
+ self.opt = P.ApplyAdamWithAmsgradV2(use_locking)
794
802
  else:
795
803
  self.opt = P.Adam(use_locking, use_nesterov)
796
804
  self.sparse_opt = P.FusedSparseAdam(use_locking, use_nesterov)
@@ -910,6 +918,7 @@ class Adam(Optimizer):
910
918
  gradients = self.scale_grad(gradients)
911
919
  gradients = self._grad_sparse_indices_deduplicate(gradients)
912
920
  lr = self.get_lr()
921
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
913
922
 
914
923
  beta1_power = self.beta1_power * self.beta1
915
924
  self.beta1_power = beta1_power
@@ -977,7 +986,7 @@ class AdamWeightDecay(Optimizer):
977
986
  There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
978
987
  and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
979
988
  As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means, refer
980
- document `LossScale <https://www.mindspore.cn/tutorials/en/r2.0/advanced/mixed_precision.html>`_ to
989
+ document `LossScale <https://www.mindspore.cn/tutorials/en/r2.2/advanced/mixed_precision.html>`_ to
981
990
  process `loss_scale` correctly.
982
991
 
983
992
  If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
@@ -1008,7 +1017,7 @@ class AdamWeightDecay(Optimizer):
1008
1017
  If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
1009
1018
  one group of `params`.
1010
1019
 
1011
- learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 1e-3.
1020
+ learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``1e-3`` .
1012
1021
 
1013
1022
  - float: The fixed learning rate value. Must be equal to or greater than 0.
1014
1023
 
@@ -1022,14 +1031,14 @@ class AdamWeightDecay(Optimizer):
1022
1031
  - LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
1023
1032
  LearningRateSchedule with step as the input to get the learning rate of current step.
1024
1033
 
1025
- beta1 (float): The exponential decay rate for the 1st moment estimations. Default: 0.9.
1034
+ beta1 (float): The exponential decay rate for the 1st moment estimations. Default: ``0.9`` .
1026
1035
  Should be in range (0.0, 1.0).
1027
- beta2 (float): The exponential decay rate for the 2nd moment estimations. Default: 0.999.
1036
+ beta2 (float): The exponential decay rate for the 2nd moment estimations. Default: ``0.999`` .
1028
1037
  Should be in range (0.0, 1.0).
1029
- eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6.
1038
+ eps (float): Term added to the denominator to improve numerical stability. Default: ``1e-6`` .
1030
1039
  Should be greater than 0.
1031
1040
 
1032
- weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
1041
+ weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
1033
1042
 
1034
1043
  - float: The fixed weight decay value. Must be equal to or greater than 0.
1035
1044
 
@@ -1060,7 +1069,9 @@ class AdamWeightDecay(Optimizer):
1060
1069
  >>> import mindspore as ms
1061
1070
  >>> from mindspore import nn
1062
1071
  >>>
1063
- >>> net = Net()
1072
+ >>> # Define the network structure of LeNet5. Refer to
1073
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
1074
+ >>> net = LeNet5()
1064
1075
  >>> #1) All parameters use the same learning rate and weight decay
1065
1076
  >>> optim = nn.AdamWeightDecay(params=net.trainable_params())
1066
1077
  >>>
@@ -1089,16 +1100,17 @@ class AdamWeightDecay(Optimizer):
1089
1100
  self.moments1 = self._parameters.clone(prefix="adam_m", init='zeros')
1090
1101
  self.moments2 = self._parameters.clone(prefix="adam_v", init='zeros')
1091
1102
  self.fused_opt = P.AdamWeightDecay()
1092
- if context.get_context("device_target") == "CPU":
1093
- self.use_fused_opt = True
1094
- else:
1103
+ if context.get_context("device_target") == "Ascend":
1095
1104
  self.use_fused_opt = False
1105
+ else:
1106
+ self.use_fused_opt = True
1096
1107
 
1097
1108
  @jit
1098
1109
  def construct(self, gradients):
1099
1110
  gradients = self.flatten_gradients(gradients)
1100
1111
  weight_decay = self.get_weight_decay()
1101
1112
  lr = self.get_lr()
1113
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
1102
1114
 
1103
1115
  if self.use_fused_opt:
1104
1116
  if self.is_group:
@@ -1206,7 +1218,7 @@ class AdamOffload(Optimizer):
1206
1218
  If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
1207
1219
  one group of `params`.
1208
1220
 
1209
- learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 1e-3.
1221
+ learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``1e-3`` .
1210
1222
 
1211
1223
  - float: The fixed learning rate value. Must be equal to or greater than 0.
1212
1224
 
@@ -1221,19 +1233,19 @@ class AdamOffload(Optimizer):
1221
1233
  LearningRateSchedule with step as the input to get the learning rate of current step.
1222
1234
 
1223
1235
  beta1 (float): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0).
1224
- Default: 0.9.
1236
+ Default: ``0.9`` .
1225
1237
  beta2 (float): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0).
1226
- Default: 0.999.
1227
- eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0. Default:
1228
- 1e-8.
1238
+ Default: ``0.999`` .
1239
+ eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
1240
+ Default: ``1e-8`` .
1229
1241
  use_locking (bool): Whether to enable a lock to protect the updating process of variable tensors.
1230
- If true, updates of the `w`, `m`, and `v` tensors will be protected by a lock.
1231
- If false, the result is unpredictable. Default: False.
1242
+ If ``true`` , updates of the `w`, `m`, and `v` tensors will be protected by a lock.
1243
+ If ``false`` , the result is unpredictable. Default: ``False`` .
1232
1244
  use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
1233
- If true, update the gradients using NAG.
1234
- If false, update the gradients without using NAG. Default: False.
1245
+ If ``true`` , update the gradients using NAG.
1246
+ If ``false`` , update the gradients without using NAG. Default: ``False`` .
1235
1247
 
1236
- weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
1248
+ weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
1237
1249
 
1238
1250
  - float: The fixed weight decay value. Must be equal to or greater than 0.
1239
1251
 
@@ -1244,15 +1256,15 @@ class AdamOffload(Optimizer):
1244
1256
 
1245
1257
  loss_scale (float): A floating point value for the loss scale. Should be greater than 0. In general, use the
1246
1258
  default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
1247
- `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
1259
+ `FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
1248
1260
  `FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
1249
- Default: 1.0.
1261
+ Default: ``1.0`` .
1250
1262
 
1251
1263
  Inputs:
1252
1264
  - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
1253
1265
 
1254
1266
  Outputs:
1255
- Tensor[bool], the value is True.
1267
+ Tensor[bool], the value is ``True`` .
1256
1268
 
1257
1269
  Raises:
1258
1270
  TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
@@ -1271,7 +1283,9 @@ class AdamOffload(Optimizer):
1271
1283
  >>> import mindspore as ms
1272
1284
  >>> from mindspore import nn
1273
1285
  >>>
1274
- >>> net = Net()
1286
+ >>> # Define the network structure of LeNet5. Refer to
1287
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
1288
+ >>> net = LeNet5()
1275
1289
  >>> #1) All parameters use the same learning rate and weight decay
1276
1290
  >>> optim = nn.AdamOffload(params=net.trainable_params())
1277
1291
  >>>
@@ -1318,6 +1332,7 @@ class AdamOffload(Optimizer):
1318
1332
  gradients = self.decay_weight(gradients)
1319
1333
  gradients = self.scale_grad(gradients)
1320
1334
  lr = self.get_lr()
1335
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
1321
1336
 
1322
1337
  beta1_power = self.beta1_power * self.beta1
1323
1338
  self.beta1_power = beta1_power
@@ -66,7 +66,7 @@ class AdaMax(Optimizer):
66
66
  :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector,
67
67
  :math:`g` represents `gradients`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
68
68
  :math:`t` represents the current step, :math:`beta_1^t` represent `beta1_power`,
69
- :math:`\l` represents `learning_rate`, :math:`w` represents `params`,
69
+ :math:`l` represents `learning_rate`, :math:`w` represents `params`,
70
70
  :math:`\epsilon` represents `eps`.
71
71
 
72
72
  Note:
@@ -102,7 +102,7 @@ class AdaMax(Optimizer):
102
102
  If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
103
103
  one group of `params`.
104
104
 
105
- learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: 0.001.
105
+ learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``0.001`` .
106
106
 
107
107
  - float: The fixed learning rate value. Must be equal to or greater than 0.
108
108
 
@@ -117,13 +117,13 @@ class AdaMax(Optimizer):
117
117
  LearningRateSchedule with step as the input to get the learning rate of current step.
118
118
 
119
119
  beta1 (float): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0).
120
- Default: 0.9.
120
+ Default: ``0.9`` .
121
121
  beta2 (float): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0).
122
- Default: 0.999.
123
- eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0. Default:
124
- 1e-8.
122
+ Default: ``0.999`` .
123
+ eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
124
+ Default: ``1e-08`` .
125
125
 
126
- weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
126
+ weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
127
127
 
128
128
  - float: The fixed weight decay value. Must be equal to or greater than 0.
129
129
 
@@ -134,9 +134,9 @@ class AdaMax(Optimizer):
134
134
 
135
135
  loss_scale (float): A floating point value for the loss scale. Should be greater than 0. In general, use the
136
136
  default value. Only when `FixedLossScaleManager` is used for training and the `drop_overflow_update` in
137
- `FixedLossScaleManager` is set to False, then this value needs to be the same as the `loss_scale` in
137
+ `FixedLossScaleManager` is set to ``False`` , then this value needs to be the same as the `loss_scale` in
138
138
  `FixedLossScaleManager`. Refer to class :class:`mindspore.amp.FixedLossScaleManager` for more details.
139
- Default: 1.0.
139
+ Default: ``1.0`` .
140
140
 
141
141
  Inputs:
142
142
  - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@@ -160,7 +160,9 @@ class AdaMax(Optimizer):
160
160
  >>> import mindspore as ms
161
161
  >>> from mindspore import nn
162
162
  >>>
163
- >>> net = Net()
163
+ >>> # Define the network structure of LeNet5. Refer to
164
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
165
+ >>> net = LeNet5()
164
166
  >>> #1) All parameters use the same learning rate and weight decay
165
167
  >>> optim = nn.AdaMax(params=net.trainable_params())
166
168
  >>>
@@ -202,6 +204,7 @@ class AdaMax(Optimizer):
202
204
  gradients = self.gradients_centralization(gradients)
203
205
  gradients = self.scale_grad(gradients)
204
206
  lr = self.get_lr()
207
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
205
208
 
206
209
  self.beta1_power *= self.beta1
207
210
 
@@ -442,12 +442,14 @@ class AdaSumByGradWrapCell(Cell):
442
442
  ``Ascend`` ``GPU``
443
443
 
444
444
  Examples:
445
+ >>> import mindspore as ms
445
446
  >>> from mindspore import nn
446
- >>> from mindspore.nn import AdaSumByGradWrapCell
447
- >>> net = Net()
448
- >>> optim = AdaSumByGradWrapCell(nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9))
447
+ >>> # Define the network structure of LeNet5. Refer to
448
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
449
+ >>> net = LeNet5()
450
+ >>> optim = nn.AdaSumByGradWrapCell(nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9))
449
451
  >>> loss = nn.SoftmaxCrossEntropyWithLogits()
450
- >>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None)
452
+ >>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim, metrics=None)
451
453
  """
452
454
  def __init__(self, optimizer):
453
455
  super(AdaSumByGradWrapCell, self).__init__(auto_prefix=False)
@@ -509,13 +511,15 @@ class AdaSumByDeltaWeightWrapCell(Cell):
509
511
  ``Ascend`` ``GPU``
510
512
 
511
513
  Examples:
514
+ >>> import mindspore as ms
512
515
  >>> from mindspore import nn
513
- >>> from mindspore.nn import AdaSumByDeltaWeightWrapCell
514
- >>> net = Net()
515
- >>> optim = AdaSumByDeltaWeightWrapCell(nn.Momentum(params=net.trainable_params(),
516
+ >>> # Define the network structure of LeNet5. Refer to
517
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
518
+ >>> net = LeNet5()
519
+ >>> optim = nn.AdaSumByDeltaWeightWrapCell(nn.Momentum(params=net.trainable_params(),
516
520
  ... learning_rate=0.1, momentum=0.9))
517
521
  >>> loss = nn.SoftmaxCrossEntropyWithLogits()
518
- >>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None)
522
+ >>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim, metrics=None)
519
523
  """
520
524
  def __init__(self, optimizer):
521
525
  super(AdaSumByDeltaWeightWrapCell, self).__init__(auto_prefix=False)
@@ -81,7 +81,7 @@ class ASGD(Optimizer):
81
81
  If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
82
82
  one group of `params`.
83
83
 
84
- learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): learning_rate. Default: 0.1.
84
+ learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): learning_rate. Default: ``0.1`` .
85
85
 
86
86
  - float: The fixed learning rate value. Must be equal to or greater than 0.
87
87
 
@@ -95,10 +95,10 @@ class ASGD(Optimizer):
95
95
  - LearningRateSchedule: Learning rate is dynamic. During training, the optimizer calls the instance of
96
96
  LearningRateSchedule with step as the input to get the learning rate of current step.
97
97
 
98
- lambd (float): The decay term. Default: 1e-4.
99
- alpha (float): The power for :math:`\eta` update. Default: 0.75.
100
- t0 (float): The point of starting averaging. Default: 1e6.
101
- weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: 0.0.
98
+ lambd (float): The decay term. Default: ``1e-4`` .
99
+ alpha (float): The power for :math:`\eta` update. Default: ``0.75`` .
100
+ t0 (float): The point of starting averaging. Default: ``1e6`` .
101
+ weight_decay (Union[float, int, Cell]): Weight decay (L2 penalty). Default: ``0.0`` .
102
102
 
103
103
  - float: The fixed weight decay value. Must be equal to or greater than 0.
104
104
 
@@ -127,7 +127,9 @@ class ASGD(Optimizer):
127
127
  >>> import mindspore as ms
128
128
  >>> from mindspore import nn
129
129
  >>>
130
- >>> net = Net()
130
+ >>> # Define the network structure of LeNet5. Refer to
131
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
132
+ >>> net = LeNet5()
131
133
  >>> #1) All parameters use the same learning rate and weight decay
132
134
  >>> optim = nn.ASGD(params=net.trainable_params())
133
135
  >>>
@@ -145,7 +147,7 @@ class ASGD(Optimizer):
145
147
  >>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
146
148
  >>>
147
149
  >>> loss = nn.SoftmaxCrossEntropyWithLogits()
148
- >>> model = ms.Model(net, loss_fn=loss, optimizer=optim)
150
+ >>> model = ms.train.Model(net, loss_fn=loss, optimizer=optim)
149
151
  """
150
152
 
151
153
  @opt_init_args_register
@@ -183,8 +185,7 @@ class ASGD(Optimizer):
183
185
  gradients = self.gradients_centralization(gradients)
184
186
  gradients = self.scale_grad(gradients)
185
187
  lrs = self.get_lr()
186
- if not self._is_dynamic_lr_or_weight_decay():
187
- self.assignadd(self.global_step, self.global_step_increase_tensor)
188
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
188
189
  success = True
189
190
  params = self._parameters
190
191
  for index, (grad, param, mu, eta, ax) in enumerate(zip(gradients, params, self.mu, self.eta, self.ax)):