mindspore 2.1.0__cp39-cp39-win_amd64.whl → 2.2.10__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (505) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +4 -1
  5. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  8. mindspore/_check_jit_forbidden_api.py +3 -1
  9. mindspore/_checkparam.py +23 -29
  10. mindspore/_extends/graph_kernel/__init__.py +0 -1
  11. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  12. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  13. mindspore/_extends/graph_kernel/splitter.py +4 -11
  14. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
  15. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
  16. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  17. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  18. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
  19. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
  20. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  21. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  22. mindspore/_extends/parse/__init__.py +12 -15
  23. mindspore/_extends/parse/namespace.py +7 -33
  24. mindspore/_extends/parse/parser.py +61 -71
  25. mindspore/_extends/parse/resources.py +1 -1
  26. mindspore/_extends/parse/standard_method.py +74 -104
  27. mindspore/_extends/parse/trope.py +1 -1
  28. mindspore/_extends/remote/kernel_build_server.py +25 -7
  29. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  30. mindspore/_install_custom.py +43 -0
  31. mindspore/amp.py +47 -11
  32. mindspore/atlprov.dll +0 -0
  33. mindspore/boost/boost.py +1 -8
  34. mindspore/boost/boost_cell_wrapper.py +3 -2
  35. mindspore/boost/grad_accumulation.py +1 -1
  36. mindspore/boost/group_loss_scale_manager.py +8 -7
  37. mindspore/c1.dll +0 -0
  38. mindspore/c1xx.dll +0 -0
  39. mindspore/c2.dll +0 -0
  40. mindspore/common/__init__.py +5 -3
  41. mindspore/common/_jit_fallback_utils.py +6 -0
  42. mindspore/common/_register_for_adapter.py +2 -0
  43. mindspore/common/_register_for_tensor.py +2 -2
  44. mindspore/common/_stub_tensor.py +13 -0
  45. mindspore/common/_utils.py +13 -0
  46. mindspore/common/api.py +174 -259
  47. mindspore/common/auto_dynamic_shape.py +494 -0
  48. mindspore/common/dtype.py +18 -11
  49. mindspore/common/dump.py +6 -4
  50. mindspore/common/initializer.py +14 -14
  51. mindspore/common/jit_config.py +33 -15
  52. mindspore/common/lazy_inline.py +126 -7
  53. mindspore/common/mindir_util.py +101 -0
  54. mindspore/common/parameter.py +51 -41
  55. mindspore/common/seed.py +4 -4
  56. mindspore/common/sparse_tensor.py +13 -14
  57. mindspore/common/tensor.py +243 -165
  58. mindspore/communication/__init__.py +7 -4
  59. mindspore/communication/_comm_helper.py +83 -4
  60. mindspore/communication/management.py +152 -84
  61. mindspore/config/op_info.config +14 -3
  62. mindspore/context.py +152 -61
  63. mindspore/dataset/__init__.py +5 -5
  64. mindspore/dataset/audio/__init__.py +2 -2
  65. mindspore/dataset/audio/transforms.py +52 -52
  66. mindspore/dataset/callback/ds_callback.py +16 -2
  67. mindspore/dataset/core/config.py +68 -51
  68. mindspore/dataset/engine/cache_client.py +28 -5
  69. mindspore/dataset/engine/datasets.py +250 -112
  70. mindspore/dataset/engine/datasets_audio.py +43 -211
  71. mindspore/dataset/engine/datasets_standard_format.py +16 -35
  72. mindspore/dataset/engine/datasets_text.py +43 -67
  73. mindspore/dataset/engine/datasets_user_defined.py +86 -100
  74. mindspore/dataset/engine/datasets_vision.py +219 -1029
  75. mindspore/dataset/engine/iterators.py +11 -4
  76. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
  77. mindspore/dataset/engine/obs/util.py +3 -0
  78. mindspore/dataset/engine/samplers.py +1 -1
  79. mindspore/dataset/engine/validators.py +19 -5
  80. mindspore/dataset/text/__init__.py +3 -3
  81. mindspore/dataset/text/transforms.py +101 -127
  82. mindspore/dataset/text/utils.py +205 -138
  83. mindspore/dataset/transforms/__init__.py +1 -1
  84. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  85. mindspore/dataset/transforms/transforms.py +95 -40
  86. mindspore/dataset/utils/browse_dataset.py +8 -2
  87. mindspore/dataset/utils/line_reader.py +17 -19
  88. mindspore/dataset/vision/__init__.py +3 -3
  89. mindspore/dataset/vision/c_transforms.py +6 -3
  90. mindspore/dataset/vision/transforms.py +409 -287
  91. mindspore/dataset/vision/utils.py +13 -14
  92. mindspore/dataset/vision/validators.py +11 -1
  93. mindspore/dnnl.dll +0 -0
  94. mindspore/dpcmi.dll +0 -0
  95. mindspore/experimental/map_parameter.py +14 -0
  96. mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
  97. mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
  98. mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
  99. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  100. mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
  101. mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
  102. mindspore/gen_ops.py +273 -0
  103. mindspore/include/OWNERS +0 -1
  104. mindspore/include/api/data_type.h +2 -1
  105. mindspore/include/api/graph.h +0 -15
  106. mindspore/include/api/kernel.h +2 -0
  107. mindspore/include/api/kernel_api.h +37 -12
  108. mindspore/include/api/model.h +17 -14
  109. mindspore/include/api/status.h +8 -3
  110. mindspore/include/api/types.h +37 -4
  111. mindspore/include/c_api/ms/abstract.h +67 -0
  112. mindspore/include/c_api/ms/attribute.h +197 -0
  113. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  114. mindspore/include/c_api/ms/base/macros.h +32 -0
  115. mindspore/include/c_api/ms/base/status.h +33 -0
  116. mindspore/include/c_api/ms/base/types.h +282 -0
  117. mindspore/include/c_api/ms/context.h +102 -0
  118. mindspore/include/c_api/ms/graph.h +160 -0
  119. mindspore/include/c_api/ms/node.h +606 -0
  120. mindspore/include/c_api/ms/tensor.h +161 -0
  121. mindspore/include/c_api/ms/value.h +84 -0
  122. mindspore/include/dataset/constants.h +6 -5
  123. mindspore/include/dataset/execute.h +23 -13
  124. mindspore/include/dataset/text.h +26 -26
  125. mindspore/include/dataset/transforms.h +13 -13
  126. mindspore/include/dataset/vision.h +60 -60
  127. mindspore/include/dataset/vision_ascend.h +5 -6
  128. mindspore/include/dataset/vision_lite.h +17 -17
  129. mindspore/jpeg62.dll +0 -0
  130. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  131. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  132. mindspore/mindspore_backend.dll +0 -0
  133. mindspore/mindspore_common.dll +0 -0
  134. mindspore/mindspore_core.dll +0 -0
  135. mindspore/mindspore_glog.dll +0 -0
  136. mindspore/mindspore_shared_lib.dll +0 -0
  137. mindspore/msobj140.dll +0 -0
  138. mindspore/mspdb140.dll +0 -0
  139. mindspore/mspdbcore.dll +0 -0
  140. mindspore/mspdbst.dll +0 -0
  141. mindspore/mspft140.dll +0 -0
  142. mindspore/msvcdis140.dll +0 -0
  143. mindspore/msvcp140_1.dll +0 -0
  144. mindspore/msvcp140_2.dll +0 -0
  145. mindspore/msvcp140_atomic_wait.dll +0 -0
  146. mindspore/msvcp140_codecvt_ids.dll +0 -0
  147. mindspore/nn/__init__.py +0 -2
  148. mindspore/nn/cell.py +313 -74
  149. mindspore/nn/dynamic_lr.py +21 -21
  150. mindspore/nn/layer/activation.py +22 -30
  151. mindspore/nn/layer/basic.py +15 -13
  152. mindspore/nn/layer/channel_shuffle.py +1 -1
  153. mindspore/nn/layer/container.py +271 -9
  154. mindspore/nn/layer/conv.py +323 -204
  155. mindspore/nn/layer/dense.py +8 -5
  156. mindspore/nn/layer/embedding.py +33 -27
  157. mindspore/nn/layer/flash_attention.py +141 -88
  158. mindspore/nn/layer/image.py +8 -6
  159. mindspore/nn/layer/math.py +16 -25
  160. mindspore/nn/layer/normalization.py +107 -66
  161. mindspore/nn/layer/padding.py +1 -1
  162. mindspore/nn/layer/pooling.py +131 -109
  163. mindspore/nn/layer/rnn_cells.py +27 -22
  164. mindspore/nn/layer/rnns.py +13 -16
  165. mindspore/nn/layer/thor_layer.py +1 -1
  166. mindspore/nn/layer/transformer.py +221 -154
  167. mindspore/nn/learning_rate_schedule.py +9 -1
  168. mindspore/nn/loss/loss.py +235 -174
  169. mindspore/nn/optim/ada_grad.py +2 -1
  170. mindspore/nn/optim/adadelta.py +1 -0
  171. mindspore/nn/optim/adafactor.py +2 -1
  172. mindspore/nn/optim/adam.py +7 -4
  173. mindspore/nn/optim/adamax.py +3 -2
  174. mindspore/nn/optim/adasum.py +2 -2
  175. mindspore/nn/optim/asgd.py +2 -3
  176. mindspore/nn/optim/ftrl.py +6 -5
  177. mindspore/nn/optim/lamb.py +7 -4
  178. mindspore/nn/optim/lars.py +1 -1
  179. mindspore/nn/optim/lazyadam.py +5 -3
  180. mindspore/nn/optim/momentum.py +2 -1
  181. mindspore/nn/optim/optimizer.py +53 -4
  182. mindspore/nn/optim/proximal_ada_grad.py +3 -4
  183. mindspore/nn/optim/rmsprop.py +4 -3
  184. mindspore/nn/optim/rprop.py +23 -12
  185. mindspore/nn/optim/sgd.py +26 -11
  186. mindspore/nn/optim/thor.py +9 -7
  187. mindspore/nn/probability/bijector/bijector.py +5 -5
  188. mindspore/nn/probability/bijector/power_transform.py +27 -27
  189. mindspore/nn/probability/bijector/softplus.py +3 -3
  190. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
  191. mindspore/nn/probability/distribution/bernoulli.py +5 -5
  192. mindspore/nn/probability/distribution/beta.py +3 -3
  193. mindspore/nn/probability/distribution/categorical.py +7 -7
  194. mindspore/nn/probability/distribution/cauchy.py +0 -1
  195. mindspore/nn/probability/distribution/distribution.py +3 -3
  196. mindspore/nn/probability/distribution/gamma.py +3 -3
  197. mindspore/nn/probability/distribution/geometric.py +4 -4
  198. mindspore/nn/probability/distribution/gumbel.py +4 -4
  199. mindspore/nn/probability/distribution/log_normal.py +2 -2
  200. mindspore/nn/probability/distribution/logistic.py +2 -2
  201. mindspore/nn/probability/distribution/poisson.py +4 -4
  202. mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
  203. mindspore/nn/probability/distribution/uniform.py +6 -6
  204. mindspore/nn/wrap/cell_wrapper.py +84 -34
  205. mindspore/nn/wrap/grad_reducer.py +8 -5
  206. mindspore/nn/wrap/loss_scale.py +105 -42
  207. mindspore/numpy/array_creations.py +1 -2
  208. mindspore/numpy/array_ops.py +3 -2
  209. mindspore/numpy/utils_const.py +5 -5
  210. mindspore/opencv_core452.dll +0 -0
  211. mindspore/opencv_imgcodecs452.dll +0 -0
  212. mindspore/opencv_imgproc452.dll +0 -0
  213. mindspore/ops/_grad_experimental/__init__.py +0 -5
  214. mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
  215. mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
  216. mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
  217. mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
  218. mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
  219. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
  220. mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
  221. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  222. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  223. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +165 -109
  224. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +144 -86
  225. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +172 -187
  226. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +51 -57
  227. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +6 -17
  228. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +1 -1
  229. mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
  230. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  231. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  232. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  233. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  234. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  235. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  236. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  237. mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
  238. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  239. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  240. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  241. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  242. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  243. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  244. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  245. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
  246. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
  247. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  248. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  249. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  250. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  251. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  252. mindspore/ops/_op_impl/tbe/__init__.py +4 -4
  253. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  254. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  255. mindspore/ops/_primitive_cache.py +1 -1
  256. mindspore/ops/_tracefunc.py +45 -13
  257. mindspore/ops/_utils/utils.py +6 -1
  258. mindspore/ops/_vmap/vmap_array_ops.py +3 -3
  259. mindspore/ops/_vmap/vmap_base.py +3 -3
  260. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  261. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  262. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  263. mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
  264. mindspore/ops/arg_dtype_cast.py +54 -0
  265. mindspore/ops/composite/base.py +37 -10
  266. mindspore/ops/composite/math_ops.py +5 -4
  267. mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
  268. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
  269. mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
  270. mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
  271. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  272. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  273. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  274. mindspore/ops/deprecated.py +304 -0
  275. mindspore/ops/function/__init__.py +4 -1
  276. mindspore/ops/function/array_func.py +174 -193
  277. mindspore/ops/function/clip_func.py +81 -13
  278. mindspore/ops/function/debug_func.py +1 -1
  279. mindspore/ops/function/grad/grad_func.py +18 -9
  280. mindspore/ops/function/image_func.py +10 -4
  281. mindspore/ops/function/linalg_func.py +5 -5
  282. mindspore/ops/function/math_func.py +575 -386
  283. mindspore/ops/function/nn_func.py +568 -260
  284. mindspore/ops/function/random_func.py +88 -57
  285. mindspore/ops/function/sparse_func.py +1 -1
  286. mindspore/ops/function/sparse_unary_func.py +14 -12
  287. mindspore/ops/function/vmap_func.py +6 -5
  288. mindspore/ops/functional.py +15 -10
  289. mindspore/ops/op_info_register.py +244 -25
  290. mindspore/ops/operations/__init__.py +28 -19
  291. mindspore/ops/operations/_grad_ops.py +72 -7
  292. mindspore/ops/operations/_inner_ops.py +350 -17
  293. mindspore/ops/operations/_quant_ops.py +4 -8
  294. mindspore/ops/operations/_sequence_ops.py +42 -0
  295. mindspore/ops/operations/array_ops.py +68 -282
  296. mindspore/ops/operations/comm_ops.py +107 -59
  297. mindspore/ops/operations/custom_ops.py +94 -70
  298. mindspore/ops/operations/debug_ops.py +8 -4
  299. mindspore/ops/operations/image_ops.py +18 -12
  300. mindspore/ops/operations/inner_ops.py +26 -3
  301. mindspore/ops/operations/math_ops.py +189 -141
  302. mindspore/ops/operations/nn_ops.py +794 -489
  303. mindspore/ops/operations/other_ops.py +0 -22
  304. mindspore/ops/operations/random_ops.py +53 -111
  305. mindspore/ops/operations/sparse_ops.py +3 -1
  306. mindspore/ops/primitive.py +24 -18
  307. mindspore/parallel/_auto_parallel_context.py +68 -8
  308. mindspore/parallel/_cost_model_context.py +2 -2
  309. mindspore/parallel/_offload_context.py +17 -3
  310. mindspore/parallel/_parallel_serialization.py +12 -5
  311. mindspore/parallel/_ps_context.py +12 -0
  312. mindspore/parallel/_tensor.py +18 -13
  313. mindspore/parallel/_transformer/layers.py +5 -3
  314. mindspore/parallel/_transformer/loss.py +1 -0
  315. mindspore/parallel/_transformer/moe.py +2 -2
  316. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  317. mindspore/parallel/_transformer/transformer.py +23 -3
  318. mindspore/parallel/_utils.py +11 -7
  319. mindspore/parallel/algo_parameter_config.py +85 -5
  320. mindspore/parallel/checkpoint_transform.py +19 -12
  321. mindspore/parallel/shard.py +21 -14
  322. mindspore/pgodb140.dll +0 -0
  323. mindspore/pgort140.dll +0 -0
  324. mindspore/profiler/common/struct_type.py +3 -3
  325. mindspore/profiler/common/util.py +4 -2
  326. mindspore/profiler/envprofiling.py +1 -1
  327. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  328. mindspore/profiler/parser/ascend_flops_generator.py +2 -2
  329. mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
  330. mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
  331. mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
  332. mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
  333. mindspore/profiler/parser/ascend_op_generator.py +6 -6
  334. mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
  335. mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
  336. mindspore/profiler/parser/base_timeline_generator.py +10 -8
  337. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
  338. mindspore/profiler/parser/flops_parser.py +15 -11
  339. mindspore/profiler/parser/framework_parser.py +38 -22
  340. mindspore/profiler/parser/hccl_parser.py +16 -12
  341. mindspore/profiler/parser/integrator.py +22 -11
  342. mindspore/profiler/parser/memory_usage_parser.py +2 -2
  343. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  344. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  345. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  346. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  347. mindspore/profiler/parser/optime_parser.py +1 -1
  348. mindspore/profiler/parser/profiler_info.py +21 -2
  349. mindspore/profiler/parser/step_trace_parser.py +11 -14
  350. mindspore/profiler/profiling.py +179 -89
  351. mindspore/rewrite/api/node.py +102 -19
  352. mindspore/rewrite/api/node_type.py +5 -1
  353. mindspore/rewrite/api/pattern_engine.py +1 -1
  354. mindspore/rewrite/api/scoped_value.py +9 -17
  355. mindspore/rewrite/api/symbol_tree.py +131 -47
  356. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  357. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  358. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  359. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
  360. mindspore/rewrite/common/rewrite_elog.py +5 -1
  361. mindspore/rewrite/namer.py +33 -24
  362. mindspore/rewrite/namespace.py +14 -5
  363. mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
  364. mindspore/rewrite/node/call_function.py +79 -0
  365. mindspore/rewrite/node/cell_container.py +135 -0
  366. mindspore/rewrite/node/control_flow.py +88 -0
  367. mindspore/rewrite/{node.py → node/node.py} +273 -234
  368. mindspore/rewrite/node/node_manager.py +254 -0
  369. mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
  370. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  371. mindspore/rewrite/parsers/assign_parser.py +216 -221
  372. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  373. mindspore/rewrite/parsers/class_def_parser.py +174 -113
  374. mindspore/rewrite/parsers/constant_parser.py +9 -6
  375. mindspore/rewrite/parsers/container_parser.py +9 -7
  376. mindspore/rewrite/parsers/for_parser.py +36 -15
  377. mindspore/rewrite/parsers/function_def_parser.py +24 -16
  378. mindspore/rewrite/parsers/if_parser.py +28 -24
  379. mindspore/rewrite/parsers/module_parser.py +196 -25
  380. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  381. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  382. mindspore/rewrite/parsers/return_parser.py +6 -6
  383. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  384. mindspore/rewrite/sparsify/utils.py +1 -1
  385. mindspore/rewrite/symbol_tree.py +523 -578
  386. mindspore/rewrite/symbol_tree_builder.py +9 -193
  387. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  388. mindspore/run_check/_check_version.py +6 -4
  389. mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
  390. mindspore/safeguard/rewrite_obfuscation.py +541 -0
  391. mindspore/tbbmalloc.dll +0 -0
  392. mindspore/tinyxml2.dll +0 -0
  393. mindspore/train/_utils.py +7 -3
  394. mindspore/train/amp.py +323 -123
  395. mindspore/train/anf_ir_pb2.py +14 -2
  396. mindspore/train/callback/_backup_and_restore.py +2 -12
  397. mindspore/train/callback/_callback.py +29 -4
  398. mindspore/train/callback/_checkpoint.py +23 -8
  399. mindspore/train/callback/_early_stop.py +2 -2
  400. mindspore/train/callback/_landscape.py +4 -4
  401. mindspore/train/callback/_loss_monitor.py +2 -2
  402. mindspore/train/callback/_on_request_exit.py +2 -2
  403. mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
  404. mindspore/train/callback/_summary_collector.py +15 -8
  405. mindspore/train/callback/_time_monitor.py +58 -5
  406. mindspore/train/data_sink.py +5 -11
  407. mindspore/train/dataset_helper.py +84 -57
  408. mindspore/train/loss_scale_manager.py +2 -2
  409. mindspore/train/metrics/__init__.py +3 -3
  410. mindspore/train/metrics/cosine_similarity.py +1 -1
  411. mindspore/train/metrics/hausdorff_distance.py +3 -2
  412. mindspore/train/metrics/mean_surface_distance.py +3 -2
  413. mindspore/train/metrics/metric.py +39 -19
  414. mindspore/train/metrics/roc.py +2 -2
  415. mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
  416. mindspore/train/mind_ir_pb2.py +85 -36
  417. mindspore/train/model.py +187 -47
  418. mindspore/train/serialization.py +487 -161
  419. mindspore/train/summary/_summary_adapter.py +1 -1
  420. mindspore/train/summary/_writer_pool.py +3 -2
  421. mindspore/train/summary/summary_record.py +37 -17
  422. mindspore/train/train_thor/convert_utils.py +3 -3
  423. mindspore/train/train_thor/dataset_helper.py +1 -1
  424. mindspore/turbojpeg.dll +0 -0
  425. mindspore/vcmeta.dll +0 -0
  426. mindspore/vcruntime140.dll +0 -0
  427. mindspore/vcruntime140_1.dll +0 -0
  428. mindspore/version.py +1 -1
  429. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/METADATA +5 -3
  430. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/RECORD +433 -479
  431. mindspore/_extends/graph_kernel/expander.py +0 -80
  432. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
  433. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  434. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  435. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  436. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  437. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  438. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  439. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  440. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  441. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  442. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  443. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  444. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  445. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  446. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  447. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  448. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  449. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  450. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  451. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  452. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  453. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  454. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  455. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  456. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  457. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  458. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  459. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  460. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  461. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  462. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  463. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  464. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  465. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  466. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  467. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  468. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  469. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  470. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  471. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  472. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  473. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  474. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  475. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  476. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  477. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  478. mindspore/dataset/datapreprocess/__init__.py +0 -20
  479. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  480. mindspore/include/api/net.h +0 -142
  481. mindspore/nn/lr_scheduler.py +0 -262
  482. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
  483. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
  484. mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
  485. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  486. mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
  487. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
  488. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
  489. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
  490. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  491. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  492. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
  493. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
  494. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  495. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  496. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
  497. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
  498. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
  499. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
  500. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
  501. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  502. mindspore/rewrite/node_visitor.py +0 -44
  503. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/WHEEL +0 -0
  504. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/entry_points.txt +0 -0
  505. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/top_level.txt +0 -0
@@ -166,7 +166,7 @@ class Adagrad(Optimizer):
166
166
  >>> import mindspore.nn as nn
167
167
  >>>
168
168
  >>> # Define the network structure of LeNet5. Refer to
169
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
169
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
170
170
  >>> net = LeNet5()
171
171
  >>> #1) All parameters use the same learning rate and weight decay
172
172
  >>> optim = nn.Adagrad(params=net.trainable_params())
@@ -205,6 +205,7 @@ class Adagrad(Optimizer):
205
205
  grads = self.gradients_centralization(grads)
206
206
  grads = self.scale_grad(grads)
207
207
  lr = self.get_lr()
208
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
208
209
  if self.is_group_lr:
209
210
  success = self.map_reverse(F.partial(_ada_grad_opt, self.opt), lr, params, accum,
210
211
  grads)
@@ -194,6 +194,7 @@ class Adadelta(Optimizer):
194
194
  grads = self.gradients_centralization(grads)
195
195
  grads = self.scale_grad(grads)
196
196
  lr = self.get_lr()
197
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
197
198
  if self.is_group_lr:
198
199
  success = self.map_reverse(F.partial(_adadelta_opt, self.opt, self.rho, self.epsilon), lr, params,
199
200
  self.accum, self.accum_update, grads)
@@ -264,7 +264,7 @@ class AdaFactor(Optimizer):
264
264
  >>> from mindspore import nn
265
265
  >>>
266
266
  >>> # Define the network structure of LeNet5. Refer to
267
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
267
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
268
268
  >>> net = LeNet5()
269
269
  >>> #1) Parameters use the default learning rate with None and weight decay with 0.
270
270
  >>> optim = nn.AdaFactor(params=net.trainable_params())
@@ -410,6 +410,7 @@ class AdaFactor(Optimizer):
410
410
  def construct(self, gradients):
411
411
  gradients = self.flatten_gradients(gradients)
412
412
  lr = self.get_lr()
413
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
413
414
  step = F.assign_add(self.step, 1)
414
415
  if self.scale_lr and self.relative_step:
415
416
  if self.warmup_init:
@@ -719,7 +719,7 @@ class Adam(Optimizer):
719
719
  >>> from mindspore import nn
720
720
  >>>
721
721
  >>> # Define the network structure of LeNet5. Refer to
722
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
722
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
723
723
  >>> net = LeNet5()
724
724
  >>> #1) All parameters use the same learning rate and weight decay
725
725
  >>> optim = nn.Adam(params=net.trainable_params())
@@ -918,6 +918,7 @@ class Adam(Optimizer):
918
918
  gradients = self.scale_grad(gradients)
919
919
  gradients = self._grad_sparse_indices_deduplicate(gradients)
920
920
  lr = self.get_lr()
921
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
921
922
 
922
923
  beta1_power = self.beta1_power * self.beta1
923
924
  self.beta1_power = beta1_power
@@ -985,7 +986,7 @@ class AdamWeightDecay(Optimizer):
985
986
  There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
986
987
  and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
987
988
  As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means, refer
988
- document `LossScale <https://www.mindspore.cn/tutorials/en/r2.1/advanced/mixed_precision.html>`_ to
989
+ document `LossScale <https://www.mindspore.cn/tutorials/en/r2.2/advanced/mixed_precision.html>`_ to
989
990
  process `loss_scale` correctly.
990
991
 
991
992
  If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
@@ -1069,7 +1070,7 @@ class AdamWeightDecay(Optimizer):
1069
1070
  >>> from mindspore import nn
1070
1071
  >>>
1071
1072
  >>> # Define the network structure of LeNet5. Refer to
1072
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
1073
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
1073
1074
  >>> net = LeNet5()
1074
1075
  >>> #1) All parameters use the same learning rate and weight decay
1075
1076
  >>> optim = nn.AdamWeightDecay(params=net.trainable_params())
@@ -1109,6 +1110,7 @@ class AdamWeightDecay(Optimizer):
1109
1110
  gradients = self.flatten_gradients(gradients)
1110
1111
  weight_decay = self.get_weight_decay()
1111
1112
  lr = self.get_lr()
1113
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
1112
1114
 
1113
1115
  if self.use_fused_opt:
1114
1116
  if self.is_group:
@@ -1282,7 +1284,7 @@ class AdamOffload(Optimizer):
1282
1284
  >>> from mindspore import nn
1283
1285
  >>>
1284
1286
  >>> # Define the network structure of LeNet5. Refer to
1285
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
1287
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
1286
1288
  >>> net = LeNet5()
1287
1289
  >>> #1) All parameters use the same learning rate and weight decay
1288
1290
  >>> optim = nn.AdamOffload(params=net.trainable_params())
@@ -1330,6 +1332,7 @@ class AdamOffload(Optimizer):
1330
1332
  gradients = self.decay_weight(gradients)
1331
1333
  gradients = self.scale_grad(gradients)
1332
1334
  lr = self.get_lr()
1335
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
1333
1336
 
1334
1337
  beta1_power = self.beta1_power * self.beta1
1335
1338
  self.beta1_power = beta1_power
@@ -66,7 +66,7 @@ class AdaMax(Optimizer):
66
66
  :math:`m` represents the 1st moment vector, :math:`v` represents the 2nd moment vector,
67
67
  :math:`g` represents `gradients`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
68
68
  :math:`t` represents the current step, :math:`beta_1^t` represent `beta1_power`,
69
- :math:`\l` represents `learning_rate`, :math:`w` represents `params`,
69
+ :math:`l` represents `learning_rate`, :math:`w` represents `params`,
70
70
  :math:`\epsilon` represents `eps`.
71
71
 
72
72
  Note:
@@ -161,7 +161,7 @@ class AdaMax(Optimizer):
161
161
  >>> from mindspore import nn
162
162
  >>>
163
163
  >>> # Define the network structure of LeNet5. Refer to
164
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
164
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
165
165
  >>> net = LeNet5()
166
166
  >>> #1) All parameters use the same learning rate and weight decay
167
167
  >>> optim = nn.AdaMax(params=net.trainable_params())
@@ -204,6 +204,7 @@ class AdaMax(Optimizer):
204
204
  gradients = self.gradients_centralization(gradients)
205
205
  gradients = self.scale_grad(gradients)
206
206
  lr = self.get_lr()
207
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
207
208
 
208
209
  self.beta1_power *= self.beta1
209
210
 
@@ -445,7 +445,7 @@ class AdaSumByGradWrapCell(Cell):
445
445
  >>> import mindspore as ms
446
446
  >>> from mindspore import nn
447
447
  >>> # Define the network structure of LeNet5. Refer to
448
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
448
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
449
449
  >>> net = LeNet5()
450
450
  >>> optim = nn.AdaSumByGradWrapCell(nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9))
451
451
  >>> loss = nn.SoftmaxCrossEntropyWithLogits()
@@ -514,7 +514,7 @@ class AdaSumByDeltaWeightWrapCell(Cell):
514
514
  >>> import mindspore as ms
515
515
  >>> from mindspore import nn
516
516
  >>> # Define the network structure of LeNet5. Refer to
517
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
517
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
518
518
  >>> net = LeNet5()
519
519
  >>> optim = nn.AdaSumByDeltaWeightWrapCell(nn.Momentum(params=net.trainable_params(),
520
520
  ... learning_rate=0.1, momentum=0.9))
@@ -128,7 +128,7 @@ class ASGD(Optimizer):
128
128
  >>> from mindspore import nn
129
129
  >>>
130
130
  >>> # Define the network structure of LeNet5. Refer to
131
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
131
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
132
132
  >>> net = LeNet5()
133
133
  >>> #1) All parameters use the same learning rate and weight decay
134
134
  >>> optim = nn.ASGD(params=net.trainable_params())
@@ -185,8 +185,7 @@ class ASGD(Optimizer):
185
185
  gradients = self.gradients_centralization(gradients)
186
186
  gradients = self.scale_grad(gradients)
187
187
  lrs = self.get_lr()
188
- if not self._is_dynamic_lr_or_weight_decay():
189
- self.assignadd(self.global_step, self.global_step_increase_tensor)
188
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
190
189
  success = True
191
190
  params = self._parameters
192
191
  for index, (grad, param, mu, eta, ax) in enumerate(zip(gradients, params, self.mu, self.eta, self.ax)):
@@ -296,7 +296,7 @@ class FTRL(Optimizer):
296
296
  >>> from mindspore import nn
297
297
  >>>
298
298
  >>> # Define the network structure of LeNet5. Refer to
299
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
299
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
300
300
  >>> net = LeNet5()
301
301
  >>> #1) All parameters use the same learning rate and weight decay
302
302
  >>> optim = nn.FTRL(params=net.trainable_params())
@@ -359,6 +359,7 @@ class FTRL(Optimizer):
359
359
  grads = self.scale_grad(grads)
360
360
  grads = self._grad_sparse_indices_deduplicate(grads)
361
361
  lr = self.get_lr()
362
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
362
363
 
363
364
  if self.use_dist_optimizer:
364
365
  success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, self._ps_push, self._ps_pull,
@@ -379,12 +380,12 @@ class FTRL(Optimizer):
379
380
  optimizer operation.
380
381
  """
381
382
  if not isinstance(value, str):
382
- raise TypeError("For 'FTRL', the property 'target' must be string type, "
383
- "but got type {}.".format(type(value)))
383
+ raise TypeError(f"For 'FTRL', the property 'target' must be string type, "
384
+ f"but got type {type(value)}.")
384
385
 
385
386
  if value not in ('CPU', 'Ascend', 'GPU'):
386
- raise ValueError("For 'FTRL', the property 'target' must be 'CPU', 'Ascend' or 'GPU', "
387
- "but got {}".format(value))
387
+ raise ValueError(f"For 'FTRL', the property 'target' must be 'CPU', 'Ascend' or 'GPU', "
388
+ f"but got {value}.")
388
389
 
389
390
  if value == 'CPU':
390
391
  self.sparse_opt = P.FusedSparseFtrl(self.lr, self.l1, self.l2, self.lr_power, self.use_locking)
@@ -132,7 +132,7 @@ class Lamb(Optimizer):
132
132
  There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
133
133
  and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
134
134
  As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means. Refer
135
- document `LossScale <https://www.mindspore.cn/tutorials/en/r2.1/advanced/mixed_precision.html>`_ to
135
+ document `LossScale <https://www.mindspore.cn/tutorials/en/r2.2/advanced/mixed_precision.html>`_ to
136
136
  process `loss_scale` correctly.
137
137
 
138
138
  If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
@@ -140,6 +140,10 @@ class Lamb(Optimizer):
140
140
  parameters are grouped, each group can set `weight_decay`. If not, the `weight_decay` in optimizer will be
141
141
  applied.
142
142
 
143
+ .. warning::
144
+ The update process of the Lamb optimizer is not completely elementwise, and the sharding of weights in
145
+ distributed parallel may affect the update result.
146
+
143
147
  Args:
144
148
  params (Union[list[Parameter], list[dict]]): Must be list of `Parameter` or list of `dict`. When the
145
149
  `params` is a list of `dict`, the string "params", "lr", "weight_decay", "grad_centralization" and
@@ -220,7 +224,7 @@ class Lamb(Optimizer):
220
224
  >>> from mindspore import nn
221
225
  >>>
222
226
  >>> # Define the network structure of LeNet5. Refer to
223
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
227
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
224
228
  >>> net = LeNet5()
225
229
  >>> #1) All parameters use the same learning rate and weight decay
226
230
  >>> optim = nn.Lamb(params=net.trainable_params(), learning_rate=0.1)
@@ -263,8 +267,7 @@ class Lamb(Optimizer):
263
267
  def construct(self, gradients):
264
268
  weight_decay = self.get_weight_decay()
265
269
  lr = self.get_lr()
266
- if not self._is_dynamic_lr_or_weight_decay():
267
- self.assignadd(self.global_step, self.global_step_increase_tensor)
270
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
268
271
  lamb_opt = _lamb_opt
269
272
  gradients = self.flatten_gradients(gradients)
270
273
  gradients = self.gradients_centralization(gradients)
@@ -109,7 +109,7 @@ class LARS(Optimizer):
109
109
  >>> from mindspore import nn
110
110
  >>>
111
111
  >>> # Define the network structure of LeNet5. Refer to
112
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
112
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
113
113
  >>> net = LeNet5()
114
114
  >>> loss = nn.SoftmaxCrossEntropyWithLogits()
115
115
  >>> opt = nn.Momentum(net.trainable_params(), 0.1, 0.9)
@@ -321,7 +321,7 @@ class LazyAdam(Optimizer):
321
321
  If `order_params` in the keys, other keys will be ignored and the element of 'order_params' must be in
322
322
  one group of `params`.
323
323
 
324
- learning_rate (Union[float, int, Tensor, Iterable, LearningRateSchedule]): Default: ``1e-3`` .
324
+ learning_rate (Union[float, int, Tensor, Iterable, :class:`~.train.LearningRateScheduler`]): Default: ``1e-3`` .
325
325
 
326
326
  - float: The fixed learning rate value. Must be equal to or greater than 0.
327
327
 
@@ -370,7 +370,8 @@ class LazyAdam(Optimizer):
370
370
  Tensor[bool], the value is ``True`` .
371
371
 
372
372
  Raises:
373
- TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
373
+ TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable,
374
+ :class:`~.train.LearningRateScheduler`.
374
375
  TypeError: If element of `parameters` is neither Parameter nor dict.
375
376
  TypeError: If `beta1`, `beta2`, `eps` or `loss_scale` is not a float.
376
377
  TypeError: If `weight_decay` is neither float nor int.
@@ -387,7 +388,7 @@ class LazyAdam(Optimizer):
387
388
  >>> from mindspore import nn
388
389
  >>>
389
390
  >>> # Define the network structure of LeNet5. Refer to
390
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
391
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
391
392
  >>> net = LeNet5()
392
393
  >>> #1) All parameters use the same learning rate and weight decay
393
394
  >>> optim = nn.LazyAdam(params=net.trainable_params())
@@ -445,6 +446,7 @@ class LazyAdam(Optimizer):
445
446
  gradients = self.scale_grad(gradients)
446
447
  gradients = self._grad_sparse_indices_deduplicate(gradients)
447
448
  lr = self.get_lr()
449
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
448
450
 
449
451
  beta1_power = self.beta1_power * self.beta1
450
452
  self.beta1_power = beta1_power
@@ -173,7 +173,7 @@ class Momentum(Optimizer):
173
173
  >>> from mindspore import nn
174
174
  >>>
175
175
  >>> # Define the network structure of LeNet5. Refer to
176
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
176
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
177
177
  >>> net = LeNet5()
178
178
  >>> #1) All parameters use the same learning rate and weight decay
179
179
  >>> optim = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
@@ -220,6 +220,7 @@ class Momentum(Optimizer):
220
220
  gradients = self.gradients_centralization(gradients)
221
221
  gradients = self.scale_grad(gradients)
222
222
  lr = self.get_lr()
223
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
223
224
  if self.use_dist_optimizer:
224
225
  if self.is_group_lr:
225
226
  success = self.hyper_map_reverse(F.partial(_momentum_opt, self.opt, self.momentum),
@@ -140,6 +140,57 @@ class Optimizer(Cell):
140
140
 
141
141
  Supported Platforms:
142
142
  ``Ascend`` ``GPU`` ``CPU``
143
+
144
+ Examples:
145
+ >>> import mindspore as ms
146
+ >>> from mindspore import nn
147
+ >>> import numpy as np
148
+ >>> import mindspore
149
+ >>> from mindspore import nn, ops, Tensor
150
+ >>>
151
+ >>> class MyMomentum(nn.Optimizer):
152
+ ... def __init__(self, params, learning_rate, momentum=0.9):
153
+ ... super(MyMomentum, self).__init__(learning_rate, params)
154
+ ... self.moments = self.parameters.clone(prefix="moments", init="zeros")
155
+ ... self.momentum = momentum
156
+ ... self.opt = ops.ApplyMomentum()
157
+ ...
158
+ ... def construct(self, gradients):
159
+ ... params = self.parameters
160
+ ... lr = self.get_lr()
161
+ ... gradients = self.flatten_gradients(gradients)
162
+ ... gradients = self.decay_weight(gradients)
163
+ ... gradients = self.gradients_centralization(gradients)
164
+ ... gradients = self.scale_grad(gradients)
165
+ ...
166
+ ... success = None
167
+ ... for param, mom, grad in zip(params, self.moments, gradients):
168
+ ... success = self.opt(param, mom, lr, grad, self.momentum)
169
+ ... return success
170
+ >>>
171
+ >>> net = nn.Dense(2, 3)
172
+ >>> loss_fn = nn.MAELoss()
173
+ >>> opt = MyMomentum(net.trainable_params(), 0.01)
174
+ >>>
175
+ >>> device_target = opt.target
176
+ >>> opt_unique = opt.unique
177
+ >>> weight_decay_value = opt.get_weight_decay()
178
+ >>>
179
+ >>> def forward_fn(data, label):
180
+ ... logits = net(data)
181
+ ... loss = loss_fn(logits, label)
182
+ ... return loss, logits
183
+ >>>
184
+ >>> grad_fn = mindspore.value_and_grad(forward_fn, None, opt.parameters, has_aux=True)
185
+ >>>
186
+ >>> def train_step(data, label):
187
+ ... (loss, _), grads = grad_fn(data, label)
188
+ ... opt(grads)
189
+ ... return loss
190
+ >>>
191
+ >>> data = Tensor(np.random.rand(4, 10, 2), mindspore.dtype.float32)
192
+ >>> label = Tensor(np.random.rand(4, 10, 3), mindspore.dtype.float32)
193
+ >>> train_step(data, label)
143
194
  """
144
195
  _support_parallel_optimizer = False
145
196
 
@@ -233,7 +284,7 @@ class Optimizer(Cell):
233
284
  self.cache_enable = tuple(cache_filter(x) for x in self._parameters)
234
285
  self.reciprocal_scale = Tensor(1.0 / self.loss_scale, mstype.float32)
235
286
  self.need_scale = self.loss_scale != 1.0
236
- self.global_step_increase_tensor = Tensor(1, mstype.int32)
287
+ self.global_step_increase_tensor = Tensor([1], mstype.int32)
237
288
  self.param_length = len(self._parameters)
238
289
  self.map_ = C.Map()
239
290
  self.map_reverse = C.Map(None, True)
@@ -702,8 +753,6 @@ class Optimizer(Cell):
702
753
  lr += (current_dynamic_lr,)
703
754
  else:
704
755
  lr = self.learning_rate(self.global_step).reshape(())
705
- if self._is_dynamic_lr_or_weight_decay():
706
- self.assignadd(self.global_step, self.global_step_increase_tensor)
707
756
  return lr
708
757
 
709
758
  def get_lr_parameter(self, param):
@@ -722,7 +771,7 @@ class Optimizer(Cell):
722
771
  Examples:
723
772
  >>> from mindspore import nn
724
773
  >>> # Define the network structure of LeNet5. Refer to
725
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
774
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
726
775
  >>> net = LeNet5()
727
776
  >>> conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
728
777
  >>> no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
@@ -55,9 +55,7 @@ def _check_param_value(accum, l1, l2, use_locking, prim_name=None):
55
55
 
56
56
  class ProximalAdagrad(Optimizer):
57
57
  r"""
58
- Implements the ProximalAdagrad algorithm.
59
-
60
- ProximalAdagrad is an online Learning and Stochastic Optimization.
58
+ Implements the ProximalAdagrad algorithm that is an online Learning and Stochastic Optimization.
61
59
  Refer to paper `Efficient Learning using Forward-Backward Splitting
62
60
  <http://papers.nips.cc//paper/3793-efficient-learning-using-forward-backward-splitting.pdf>`_.
63
61
 
@@ -165,7 +163,7 @@ class ProximalAdagrad(Optimizer):
165
163
  >>> from mindspore import nn
166
164
  >>>
167
165
  >>> # Define the network structure of LeNet5. Refer to
168
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
166
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
169
167
  >>> net = LeNet5()
170
168
  >>> #1) All parameters use the same learning rate and weight decay
171
169
  >>> optim = nn.ProximalAdagrad(params=net.trainable_params())
@@ -209,6 +207,7 @@ class ProximalAdagrad(Optimizer):
209
207
  grads = self.scale_grad(grads)
210
208
  grads = self._grad_sparse_indices_deduplicate(grads)
211
209
  lr = self.get_lr()
210
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
212
211
  if self.is_group_lr:
213
212
  success = self.map_reverse(F.partial(_proximal_ada_grad_opt, self.opt, self.sparse_opt, self.l1, self.l2),
214
213
  lr, grads, params, accum)
@@ -47,8 +47,8 @@ class RMSProp(Optimizer):
47
47
  Implements Root Mean Squared Propagation (RMSProp) algorithm.
48
48
 
49
49
  Update `params` according to the RMSProp algorithm.
50
- The 29th of the original presentation slide
51
- [http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf] proposes RMSProp.
50
+ The 29th of the original `presentation slide
51
+ <http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_ proposes RMSProp.
52
52
  The equation is as follows:
53
53
 
54
54
  .. math::
@@ -180,7 +180,7 @@ class RMSProp(Optimizer):
180
180
  >>> from mindspore import nn
181
181
  >>>
182
182
  >>> # Define the network structure of LeNet5. Refer to
183
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
183
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
184
184
  >>> net = LeNet5()
185
185
  >>> #1) All parameters use the same learning rate and weight decay
186
186
  >>> optim = nn.RMSProp(params=net.trainable_params(), learning_rate=0.1)
@@ -236,6 +236,7 @@ class RMSProp(Optimizer):
236
236
  gradients = self.gradients_centralization(gradients)
237
237
  gradients = self.scale_grad(gradients)
238
238
  lr = self.get_lr()
239
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
239
240
  if self.centered:
240
241
  if self.is_group_lr:
241
242
  success = self.hyper_map_reverse(F.partial(_centered_rmsprop_opt, self.opt, self.decay, self.epsilon,
@@ -135,7 +135,7 @@ class Rprop(Optimizer):
135
135
  >>> from mindspore import nn
136
136
  >>>
137
137
  >>> # Define the network structure of LeNet5. Refer to
138
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
138
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
139
139
  >>> net = LeNet5()
140
140
  >>> #1) All parameters use the same learning rate and weight decay
141
141
  >>> optim = nn.Rprop(params=net.trainable_params())
@@ -189,8 +189,8 @@ class Rprop(Optimizer):
189
189
  self.prev = self._parameters.clone(prefix="prev", init='zeros')
190
190
  self.step_size = self._parameters.clone(prefix="step_size", init='zeros')
191
191
 
192
- self.fill = P.Fill()
193
192
  self.sign = P.Sign()
193
+ self.fill = P.FillV2()
194
194
  self.assign = P.Assign()
195
195
  self.assignadd = P.AssignAdd()
196
196
  self.cast = P.Cast()
@@ -204,8 +204,7 @@ class Rprop(Optimizer):
204
204
  gradients = self.gradients_centralization(gradients)
205
205
  gradients = self.scale_grad(gradients)
206
206
  lrs = self.get_lr()
207
- if not self._is_dynamic_lr_or_weight_decay():
208
- self.assignadd(self.global_step, self.global_step_increase_tensor)
207
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
209
208
  success = True
210
209
 
211
210
  for index, (grad, param, prev, step_size) in enumerate(zip(gradients, self._parameters,
@@ -221,14 +220,26 @@ class Rprop(Optimizer):
221
220
  param_fp32 = self.cast(param, mstype.float32)
222
221
 
223
222
  sign = self.sign(gradient_fp32 * prev)
224
- sign = self.select(sign > 0, self.fill(mstype.float32, sign.shape, self.etaplus), sign)
225
- sign = self.select(sign < 0, self.fill(mstype.float32, sign.shape, self.etaminus), sign)
226
- sign = self.select(sign == 0, self.fill(mstype.float32, sign.shape, 1.), sign)
227
-
228
- step_size_fp32 = ops.clip_by_value(step_size_fp32 * sign, self.step_size_min, self.step_size_max)
229
-
230
- gradient_update = self.select(sign == self.etaminus, self.fill(mstype.float32, sign.shape, 0.),
231
- gradient_fp32)
223
+ sign = self.select(
224
+ sign > 0,
225
+ self.fill(sign.shape, self.cast(self.etaplus, mstype.float32)),
226
+ sign)
227
+ sign = self.select(
228
+ sign < 0,
229
+ self.fill(sign.shape, self.cast(self.etaminus,
230
+ mstype.float32)), sign)
231
+ sign = self.select(
232
+ sign == 0, self.fill(sign.shape,
233
+ self.cast(1., mstype.float32)), sign)
234
+
235
+ step_size_fp32 = ops.clip_by_value(step_size_fp32 * sign,
236
+ self.step_size_min,
237
+ self.step_size_max)
238
+
239
+ gradient_update = self.select(
240
+ sign == self.etaminus,
241
+ self.fill(sign.shape, self.cast(0., mstype.float32)),
242
+ gradient_fp32)
232
243
  next_param = param_fp32 - self.sign(gradient_update) * step_size_fp32
233
244
 
234
245
  self.assign(param, self.cast(next_param, param.dtype))
mindspore/nn/optim/sgd.py CHANGED
@@ -132,7 +132,7 @@ class SGD(Optimizer):
132
132
  >>> from mindspore import nn
133
133
  >>>
134
134
  >>> # Define the network structure of LeNet5. Refer to
135
- >>> # https://gitee.com/mindspore/docs/blob/r2.1/docs/mindspore/code/lenet.py
135
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
136
136
  >>> net = LeNet5()
137
137
  >>> #1) All parameters use the same learning rate and weight decay
138
138
  >>> optim = nn.SGD(params=net.trainable_params())
@@ -163,29 +163,29 @@ class SGD(Optimizer):
163
163
  if isinstance(momentum, int):
164
164
  momentum = float(momentum)
165
165
  if not isinstance(momentum, float):
166
- raise TypeError("For 'SGD', the argument 'momentum' must be float type, "
167
- "but got {}.".format(type(momentum)))
166
+ raise TypeError(f"For 'SGD', the argument 'momentum' must be float type, "
167
+ f"but got {type(momentum)}.")
168
168
 
169
169
  if isinstance(momentum, float) and momentum < 0.0:
170
- raise ValueError("For 'SGD', the argument 'momentum' must be at least 0.0, "
171
- "but got {}.".format(momentum))
170
+ raise ValueError(f"For 'SGD', the argument 'momentum' must be at least 0.0, "
171
+ f"but got {momentum}.")
172
172
 
173
173
  if isinstance(dampening, int):
174
174
  dampening = float(dampening)
175
175
  if not isinstance(dampening, float):
176
- raise TypeError("For 'SGD', the argument 'dampening' must be float type, "
177
- "but got {}.".format(type(dampening)))
176
+ raise TypeError(f"For 'SGD', the argument 'dampening' must be float type, "
177
+ f"but got {type(dampening)}.")
178
178
 
179
179
  if dampening < 0.0:
180
- raise ValueError("For 'SGD', the argument 'dampening' must be at least 0.0, "
181
- "but got 'dampening' {}".format(dampening))
180
+ raise ValueError(f"For 'SGD', the argument 'dampening' must be at least 0.0, "
181
+ f"but got 'dampening' {dampening}")
182
182
  self.dampening = dampening
183
183
 
184
184
  validator.check_value_type("nesterov", nesterov, [bool], self.cls_name)
185
185
 
186
186
  if nesterov and (momentum <= 0.0 or dampening != 0.0):
187
- raise ValueError("For 'SGD', if 'nesterov' is true, 'momentum' must be > 0.0 and 'dampening' must "
188
- "equal to 0.0, but got 'momentum' {}, 'dampening' {}".format(momentum, dampening))
187
+ raise ValueError(f"For 'SGD', if 'nesterov' is true, 'momentum' must be > 0.0 and 'dampening' must "
188
+ f"equal to 0.0, but got 'momentum' {momentum}, 'dampening' {dampening}.")
189
189
  self.nesterov = nesterov
190
190
 
191
191
  if self.dynamic_weight_decay:
@@ -198,9 +198,23 @@ class SGD(Optimizer):
198
198
  self.opt = tuple([P.SGD(dampening, float(weight_decay), nesterov)] * len(self._parameters))
199
199
 
200
200
  self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
201
+
202
+ if not momentum > 0.0:
203
+ enable_cache_param_list = []
204
+ for param in self._parameters:
205
+ if param.cache_enable:
206
+ enable_cache_param_list.append(param)
207
+ param.cache_enable = False
208
+
201
209
  self.accum = self._parameters.clone(prefix="accum", init='zeros')
202
210
  self.stat = self._parameters.clone(prefix="stat", init='ones')
203
211
 
212
+
213
+ if not momentum > 0.0:
214
+ for param in enable_cache_param_list:
215
+ param.cache_enable = True
216
+
217
+
204
218
  @jit
205
219
  def construct(self, gradients):
206
220
  params = self._parameters
@@ -210,6 +224,7 @@ class SGD(Optimizer):
210
224
  gradients = self.gradients_centralization(gradients)
211
225
  gradients = self.scale_grad(gradients)
212
226
  lr = self.get_lr()
227
+ self.assignadd(self.global_step, self.global_step_increase_tensor)
213
228
  if self.is_group_lr:
214
229
  success = self.hyper_map_reverse(F.partial(_sgd_opt, self.momentum),
215
230
  lr, gradients, params, accum, stat, self.opt)