mindspore 2.7.0__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (290) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -1
  3. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  6. mindspore/_extends/parse/compile_config.py +24 -1
  7. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -2
  8. mindspore/_extends/parse/resources.py +1 -1
  9. mindspore/_extends/parse/standard_method.py +8 -1
  10. mindspore/_extends/parse/trope.py +2 -1
  11. mindspore/_extends/pijit/pijit_func_white_list.py +7 -22
  12. mindspore/avcodec-59.dll +0 -0
  13. mindspore/avdevice-59.dll +0 -0
  14. mindspore/avfilter-8.dll +0 -0
  15. mindspore/avformat-59.dll +0 -0
  16. mindspore/avutil-57.dll +0 -0
  17. mindspore/boost/base.py +29 -2
  18. mindspore/common/_decorator.py +3 -2
  19. mindspore/common/_grad_function.py +3 -1
  20. mindspore/common/_tensor_cpp_method.py +1 -1
  21. mindspore/common/_tensor_docs.py +275 -64
  22. mindspore/common/_utils.py +0 -44
  23. mindspore/common/api.py +285 -35
  24. mindspore/common/dump.py +7 -108
  25. mindspore/common/dynamic_shape/auto_dynamic_shape.py +1 -3
  26. mindspore/common/hook_handle.py +60 -0
  27. mindspore/common/jit_config.py +5 -1
  28. mindspore/common/jit_trace.py +27 -12
  29. mindspore/common/lazy_inline.py +5 -3
  30. mindspore/common/parameter.py +13 -107
  31. mindspore/common/recompute.py +4 -11
  32. mindspore/common/tensor.py +16 -169
  33. mindspore/communication/_comm_helper.py +11 -1
  34. mindspore/communication/comm_func.py +138 -4
  35. mindspore/communication/management.py +85 -1
  36. mindspore/config/op_info.config +0 -15
  37. mindspore/context.py +5 -85
  38. mindspore/dataset/engine/datasets.py +8 -4
  39. mindspore/dataset/engine/datasets_vision.py +1 -1
  40. mindspore/dataset/engine/validators.py +1 -15
  41. mindspore/dnnl.dll +0 -0
  42. mindspore/{experimental/llm_boost/ascend_native → graph}/__init__.py +7 -7
  43. mindspore/graph/custom_pass.py +55 -0
  44. mindspore/include/dataset/execute.h +2 -2
  45. mindspore/jpeg62.dll +0 -0
  46. mindspore/mindrecord/__init__.py +3 -3
  47. mindspore/mindrecord/common/exceptions.py +1 -0
  48. mindspore/mindrecord/config.py +1 -1
  49. mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
  50. mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
  51. mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
  52. mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
  53. mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
  54. mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
  55. mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
  56. mindspore/mindrecord/filereader.py +4 -4
  57. mindspore/mindrecord/filewriter.py +5 -5
  58. mindspore/mindrecord/mindpage.py +2 -2
  59. mindspore/mindrecord/tools/cifar10.py +1 -1
  60. mindspore/mindrecord/tools/cifar100.py +1 -1
  61. mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
  62. mindspore/mindrecord/tools/cifar10_to_mr.py +1 -1
  63. mindspore/mindrecord/tools/csv_to_mr.py +1 -1
  64. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  65. mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
  66. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
  67. mindspore/mindspore_backend_common.dll +0 -0
  68. mindspore/mindspore_backend_manager.dll +0 -0
  69. mindspore/mindspore_cluster.dll +0 -0
  70. mindspore/mindspore_common.dll +0 -0
  71. mindspore/mindspore_core.dll +0 -0
  72. mindspore/mindspore_cpu.dll +0 -0
  73. mindspore/mindspore_dump.dll +0 -0
  74. mindspore/mindspore_frontend.dll +0 -0
  75. mindspore/mindspore_glog.dll +0 -0
  76. mindspore/mindspore_hardware_abstract.dll +0 -0
  77. mindspore/mindspore_memory_pool.dll +0 -0
  78. mindspore/mindspore_ms_backend.dll +0 -0
  79. mindspore/mindspore_ops.dll +0 -0
  80. mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
  81. mindspore/mindspore_profiler.dll +0 -0
  82. mindspore/mindspore_pyboost.dll +0 -0
  83. mindspore/mindspore_pynative.dll +0 -0
  84. mindspore/mindspore_runtime_pipeline.dll +0 -0
  85. mindspore/mindspore_runtime_utils.dll +0 -0
  86. mindspore/mindspore_tools.dll +0 -0
  87. mindspore/mint/__init__.py +15 -10
  88. mindspore/mint/distributed/distributed.py +182 -62
  89. mindspore/mint/nn/__init__.py +2 -16
  90. mindspore/mint/nn/functional.py +4 -110
  91. mindspore/mint/nn/layer/__init__.py +0 -2
  92. mindspore/mint/nn/layer/activation.py +0 -6
  93. mindspore/mint/nn/layer/basic.py +0 -47
  94. mindspore/mint/nn/layer/conv.py +4 -4
  95. mindspore/mint/nn/layer/normalization.py +8 -13
  96. mindspore/mint/nn/layer/pooling.py +0 -4
  97. mindspore/nn/__init__.py +1 -3
  98. mindspore/nn/cell.py +16 -66
  99. mindspore/nn/layer/basic.py +49 -1
  100. mindspore/nn/layer/container.py +16 -0
  101. mindspore/nn/layer/embedding.py +4 -169
  102. mindspore/nn/layer/normalization.py +2 -1
  103. mindspore/nn/layer/thor_layer.py +4 -85
  104. mindspore/nn/optim/ada_grad.py +0 -1
  105. mindspore/nn/optim/adafactor.py +0 -1
  106. mindspore/nn/optim/adam.py +31 -124
  107. mindspore/nn/optim/adamax.py +0 -1
  108. mindspore/nn/optim/asgd.py +0 -1
  109. mindspore/nn/optim/ftrl.py +8 -102
  110. mindspore/nn/optim/lamb.py +0 -1
  111. mindspore/nn/optim/lars.py +0 -3
  112. mindspore/nn/optim/lazyadam.py +25 -218
  113. mindspore/nn/optim/momentum.py +5 -43
  114. mindspore/nn/optim/optimizer.py +6 -55
  115. mindspore/nn/optim/proximal_ada_grad.py +0 -1
  116. mindspore/nn/optim/rmsprop.py +0 -1
  117. mindspore/nn/optim/rprop.py +0 -1
  118. mindspore/nn/optim/sgd.py +0 -1
  119. mindspore/nn/optim/tft_wrapper.py +0 -1
  120. mindspore/nn/optim/thor.py +0 -2
  121. mindspore/nn/probability/bijector/bijector.py +7 -8
  122. mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
  123. mindspore/nn/probability/bijector/power_transform.py +20 -21
  124. mindspore/nn/probability/bijector/scalar_affine.py +5 -5
  125. mindspore/nn/probability/bijector/softplus.py +13 -14
  126. mindspore/nn/wrap/grad_reducer.py +4 -74
  127. mindspore/numpy/array_creations.py +2 -2
  128. mindspore/numpy/fft.py +9 -9
  129. mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
  130. mindspore/onnx/onnx_export.py +137 -0
  131. mindspore/opencv_core4110.dll +0 -0
  132. mindspore/opencv_imgcodecs4110.dll +0 -0
  133. mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
  134. mindspore/ops/__init__.py +2 -0
  135. mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
  136. mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
  137. mindspore/ops/_op_impl/cpu/__init__.py +0 -5
  138. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +16 -22
  139. mindspore/ops/auto_generate/gen_extend_func.py +2 -7
  140. mindspore/ops/auto_generate/gen_ops_def.py +98 -141
  141. mindspore/ops/auto_generate/gen_ops_prim.py +12708 -12686
  142. mindspore/ops/communication.py +97 -0
  143. mindspore/ops/composite/__init__.py +5 -2
  144. mindspore/ops/composite/base.py +15 -1
  145. mindspore/ops/composite/multitype_ops/__init__.py +3 -1
  146. mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
  147. mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
  148. mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
  149. mindspore/ops/function/__init__.py +1 -0
  150. mindspore/ops/function/array_func.py +14 -12
  151. mindspore/ops/function/comm_func.py +3883 -0
  152. mindspore/ops/function/debug_func.py +3 -4
  153. mindspore/ops/function/math_func.py +45 -54
  154. mindspore/ops/function/nn_func.py +75 -294
  155. mindspore/ops/function/random_func.py +9 -18
  156. mindspore/ops/functional.py +2 -0
  157. mindspore/ops/functional_overload.py +354 -18
  158. mindspore/ops/operations/__init__.py +2 -5
  159. mindspore/ops/operations/_custom_ops_utils.py +7 -9
  160. mindspore/ops/operations/_inner_ops.py +1 -38
  161. mindspore/ops/operations/_rl_inner_ops.py +0 -933
  162. mindspore/ops/operations/array_ops.py +1 -0
  163. mindspore/ops/operations/comm_ops.py +94 -2
  164. mindspore/ops/operations/custom_ops.py +228 -19
  165. mindspore/ops/operations/debug_ops.py +27 -29
  166. mindspore/ops/operations/manually_defined/ops_def.py +27 -306
  167. mindspore/ops/operations/nn_ops.py +2 -2
  168. mindspore/ops/operations/sparse_ops.py +0 -83
  169. mindspore/ops/primitive.py +1 -17
  170. mindspore/ops/tensor_method.py +72 -3
  171. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
  172. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
  173. mindspore/ops_generate/api/functions_cc_generator.py +53 -4
  174. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
  175. mindspore/ops_generate/common/gen_constants.py +11 -10
  176. mindspore/ops_generate/common/op_proto.py +18 -1
  177. mindspore/ops_generate/common/template.py +102 -245
  178. mindspore/ops_generate/common/template_utils.py +212 -0
  179. mindspore/ops_generate/gen_custom_ops.py +69 -0
  180. mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
  181. mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
  182. mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
  183. mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
  184. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
  185. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
  186. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
  187. mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
  188. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
  189. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
  190. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
  191. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
  192. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
  193. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
  194. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
  195. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
  196. mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
  197. mindspore/ops_generate/resources/yaml_loader.py +13 -0
  198. mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
  199. mindspore/parallel/_cell_wrapper.py +1 -1
  200. mindspore/parallel/_parallel_serialization.py +1 -4
  201. mindspore/parallel/_utils.py +29 -6
  202. mindspore/parallel/checkpoint_transform.py +18 -2
  203. mindspore/parallel/cluster/process_entity/_api.py +24 -32
  204. mindspore/parallel/cluster/process_entity/_utils.py +9 -5
  205. mindspore/{experimental/llm_boost/atb → parallel/distributed}/__init__.py +21 -23
  206. mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
  207. mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
  208. mindspore/parallel/strategy.py +336 -0
  209. mindspore/parallel/transform_safetensors.py +117 -16
  210. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +3 -0
  211. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
  212. mindspore/profiler/common/constant.py +5 -0
  213. mindspore/profiler/common/file_manager.py +9 -0
  214. mindspore/profiler/common/msprof_cmd_tool.py +38 -2
  215. mindspore/profiler/common/path_manager.py +56 -24
  216. mindspore/profiler/common/profiler_context.py +2 -12
  217. mindspore/profiler/common/profiler_info.py +3 -3
  218. mindspore/profiler/common/profiler_path_manager.py +13 -0
  219. mindspore/profiler/common/util.py +30 -3
  220. mindspore/profiler/experimental_config.py +2 -1
  221. mindspore/profiler/platform/npu_profiler.py +33 -6
  222. mindspore/run_check/_check_version.py +108 -24
  223. mindspore/runtime/__init__.py +3 -2
  224. mindspore/runtime/executor.py +11 -3
  225. mindspore/runtime/memory.py +112 -0
  226. mindspore/swresample-4.dll +0 -0
  227. mindspore/swscale-6.dll +0 -0
  228. mindspore/tinyxml2.dll +0 -0
  229. mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
  230. mindspore/tools/data_dump.py +130 -0
  231. mindspore/tools/sdc_detect.py +91 -0
  232. mindspore/tools/stress_detect.py +63 -0
  233. mindspore/train/__init__.py +6 -6
  234. mindspore/train/_utils.py +5 -18
  235. mindspore/train/amp.py +6 -4
  236. mindspore/train/callback/_checkpoint.py +0 -9
  237. mindspore/train/callback/_train_fault_tolerance.py +69 -18
  238. mindspore/train/data_sink.py +1 -5
  239. mindspore/train/model.py +38 -211
  240. mindspore/train/serialization.py +126 -387
  241. mindspore/turbojpeg.dll +0 -0
  242. mindspore/utils/__init__.py +6 -3
  243. mindspore/utils/dlpack.py +92 -0
  244. mindspore/utils/dryrun.py +1 -1
  245. mindspore/utils/runtime_execution_order_check.py +10 -0
  246. mindspore/utils/sdc_detect.py +14 -12
  247. mindspore/utils/stress_detect.py +43 -0
  248. mindspore/utils/utils.py +144 -8
  249. mindspore/version.py +1 -1
  250. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
  251. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/RECORD +254 -267
  252. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -210
  253. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
  254. mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
  255. mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
  256. mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
  257. mindspore/experimental/llm_boost/register.py +0 -130
  258. mindspore/experimental/llm_boost/utils.py +0 -31
  259. mindspore/include/OWNERS +0 -7
  260. mindspore/mindspore_cpu_res_manager.dll +0 -0
  261. mindspore/mindspore_ops_kernel_common.dll +0 -0
  262. mindspore/mindspore_res_manager.dll +0 -0
  263. mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
  264. mindspore/nn/reinforcement/_batch_read_write.py +0 -142
  265. mindspore/nn/reinforcement/_tensors_queue.py +0 -152
  266. mindspore/nn/reinforcement/tensor_array.py +0 -145
  267. mindspore/opencv_core452.dll +0 -0
  268. mindspore/opencv_imgcodecs452.dll +0 -0
  269. mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
  270. mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
  271. mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
  272. mindspore/ops/_op_impl/cpu/buffer_append.py +0 -28
  273. mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
  274. mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
  275. mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
  276. mindspore/ops/operations/_tensor_array.py +0 -359
  277. mindspore/ops/operations/rl_ops.py +0 -288
  278. mindspore/parallel/_offload_context.py +0 -275
  279. mindspore/parallel/_recovery_context.py +0 -115
  280. mindspore/parallel/_transformer/__init__.py +0 -35
  281. mindspore/parallel/_transformer/layers.py +0 -765
  282. mindspore/parallel/_transformer/loss.py +0 -251
  283. mindspore/parallel/_transformer/moe.py +0 -693
  284. mindspore/parallel/_transformer/op_parallel_config.py +0 -222
  285. mindspore/parallel/_transformer/transformer.py +0 -3124
  286. mindspore/parallel/mpi/_mpi_config.py +0 -116
  287. mindspore/train/memory_profiling_pb2.py +0 -298
  288. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
  289. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
  290. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
@@ -26,34 +26,19 @@ from mindspore.common.tensor import Tensor
26
26
  from mindspore import _checkparam as validator
27
27
  from mindspore.nn.optim.optimizer import Optimizer
28
28
  from mindspore.nn.optim.optimizer import opt_init_args_register
29
- from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
30
29
  from mindspore.common._decorator import deprecated
31
30
 
32
31
  _lazy_adam_opt = C.MultitypeFuncGraph("lazy_adam_opt")
33
32
 
34
33
 
35
- @_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
36
- "Tensor", "Tensor", "Tensor", "Tensor", "RowTensor", "Tensor", "Tensor", "Tensor", "Bool",
37
- "Bool", "Function", "Bool", "Function", "Bool")
38
- def _run_opt_with_sparse_dist(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power,
39
- beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter, cache_enable,
40
- distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
34
+ @_lazy_adam_opt.register("Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
35
+ "Tensor", "Tensor", "Tensor", "Tensor", "RowTensor", "Tensor", "Tensor", "Tensor")
36
+ def _run_opt_with_sparse(opt, sparse_opt, use_locking, use_nesterov, target, beta1_power, beta2_power,
37
+ beta1, beta2, eps, lr, gradient, params, m, v):
41
38
  """Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse."""
42
39
  success = True
43
40
  indices = gradient.indices
44
41
  values = gradient.values
45
- if use_sparse_flag:
46
- success = F.depend(success, distributed_sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2,
47
- eps, values, indices))
48
- return success
49
- if ps_parameter and not cache_enable:
50
- op_shape = P.Shape()
51
- shapes = (op_shape(params), op_shape(m), op_shape(v),
52
- op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
53
- op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices))
54
- success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
55
- eps, values, indices), shapes), params))
56
- return success
57
42
 
58
43
  if not target:
59
44
  success = F.depend(success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2,
@@ -85,122 +70,10 @@ def _run_opt_with_sparse_dist(opt, sparse_opt, push, pull, use_locking, use_nest
85
70
  return success
86
71
 
87
72
 
88
- @_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
89
- "Tensor", "Tensor", "Tensor", "Tensor", "MapTensor", "MapTensor", "MapTensor", "MapTensor",
90
- "Bool", "Bool", "Function", "Bool", "Function", "Bool")
91
- def _run_map_tensor_opt_with_sparse_dist(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power,
92
- beta2_power, beta1, beta2, eps, lr, gradient, params, m, v,
93
- ps_parameter, cache_enable, distributed_opt, use_flag, distributed_sparse_opt,
94
- use_sparse_flag):
95
- """Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse."""
96
- success = True
97
- indices, values = gradient.get_data()
98
- if use_sparse_flag:
99
- # PS Mode.
100
- success = F.depend(success, distributed_sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2,
101
- eps, values, indices))
102
- else:
103
- # PS Cache mode.
104
- op_sqrt = P.Sqrt()
105
-
106
- m_slice = m.get(indices)
107
- v_slice = v.get(indices)
108
-
109
- next_m = m_slice * beta1 + values * (1 - beta1)
110
- next_v = v_slice * beta2 + values * values * (1 - beta2)
111
-
112
- lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
113
-
114
- if use_nesterov:
115
- m_temp = beta1 * next_m + values * (1 - beta1)
116
- param_update = m_temp / (op_sqrt(next_v) + eps)
117
- else:
118
- param_update = next_m / (op_sqrt(next_v) + eps)
119
-
120
- params_need_update = params.get(indices)
121
- params.put(indices, params_need_update - lr_t * param_update)
122
- m.put(indices, next_m)
123
- v.put(indices, next_v)
124
-
125
- return success
126
-
127
-
128
- @_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
129
- "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool",
130
- "Function", "Bool", "Function", "Bool")
131
- def _run_opt_with_one_number_dist(opt, sparse_opt, push, pull, use_locking, use_nesterov, target,
132
- beta1_power, beta2_power,
133
- beta1, beta2, eps, lr, gradient, params, moment1, moment2, ps_parameter, cache_enable,
134
- distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
135
- """Apply lazy adam optimizer to the weight parameter using Tensor."""
136
- success = True
137
- if use_flag:
138
- success = F.depend(success, distributed_opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1,
139
- beta2, eps, gradient))
140
- elif ps_parameter and not cache_enable:
141
- op_shape = P.Shape()
142
- success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
143
- (op_shape(params), op_shape(moment1), op_shape(moment2))), params))
144
- else:
145
- success = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2,
146
- eps, gradient))
147
- return success
148
-
149
-
150
- @_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
151
- "Tensor", "Tensor", "Tensor", "Tensor", "RowTensor", "Tensor", "Tensor", "Tensor", "Bool",
152
- "Bool")
153
- def _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power,
154
- beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter, cache_enable):
155
- """Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse."""
156
- success = True
157
- indices = gradient.indices
158
- values = gradient.values
159
- if ps_parameter and not cache_enable:
160
- op_shape = P.Shape()
161
- shapes = (op_shape(params), op_shape(m), op_shape(v),
162
- op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1),
163
- op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices))
164
- success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2,
165
- eps, values, indices), shapes), params))
166
- return success
167
-
168
- if not target:
169
- success = F.depend(success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2,
170
- eps, values, indices))
171
- else:
172
- op_gather = P.Gather()
173
- op_sqrt = P.Sqrt()
174
- scatter_add = P.ScatterAdd(use_locking)
175
- scatter_update = P.ScatterUpdate(use_locking)
176
-
177
- m_slice = op_gather(m, indices, 0)
178
- v_slice = op_gather(v, indices, 0)
179
-
180
- next_m = m_slice * beta1 + values * (1 - beta1)
181
- next_v = v_slice * beta2 + values * values * (1 - beta2)
182
-
183
- lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)
184
-
185
- if use_nesterov:
186
- m_temp = beta1 * next_m + values * (1 - beta1)
187
- param_update = m_temp / (op_sqrt(next_v) + eps)
188
- else:
189
- param_update = next_m / (op_sqrt(next_v) + eps)
190
-
191
- success = F.depend(success, scatter_add(params, indices, - lr_t * param_update))
192
- success = F.depend(success, scatter_update(m, indices, next_m))
193
- success = F.depend(success, scatter_update(v, indices, next_v))
194
-
195
- return success
196
-
197
-
198
- @_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
199
- "Tensor", "Tensor", "Tensor", "Tensor", "MapTensor", "MapTensor", "MapTensor", "MapTensor",
200
- "Bool", "Bool")
201
- def _run_map_tensor_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power,
202
- beta2_power, beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter,
203
- cache_enable):
73
+ @_lazy_adam_opt.register("Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
74
+ "Tensor", "Tensor", "Tensor", "Tensor", "MapTensor", "MapTensor", "MapTensor", "MapTensor")
75
+ def _run_map_tensor_opt_with_sparse(opt, sparse_opt, use_locking, use_nesterov, target, beta1_power,
76
+ beta2_power, beta1, beta2, eps, lr, gradient, params, m, v):
204
77
  """Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse(MapTensor)."""
205
78
  success = True
206
79
  indices, values = gradient.get_data()
@@ -229,19 +102,14 @@ def _run_map_tensor_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, us
229
102
  return success
230
103
 
231
104
 
232
- @_lazy_adam_opt.register("Function", "Function", "Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
233
- "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool")
234
- def _run_opt_with_one_number(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power,
235
- beta1, beta2, eps, lr, gradient, params, moment1, moment2, ps_parameter, cache_enable):
105
+ @_lazy_adam_opt.register("Function", "Function", "Bool", "Bool", "Bool", "Tensor", "Tensor",
106
+ "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor")
107
+ def _run_opt_with_one_number(opt, sparse_opt, use_locking, use_nesterov, target, beta1_power, beta2_power,
108
+ beta1, beta2, eps, lr, gradient, params, moment1, moment2):
236
109
  """Apply lazy adam optimizer to the weight parameter using Tensor."""
237
110
  success = True
238
- if ps_parameter and not cache_enable:
239
- op_shape = P.Shape()
240
- success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
241
- (op_shape(params), op_shape(moment1), op_shape(moment2))), params))
242
- else:
243
- success = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2,
244
- eps, gradient))
111
+ success = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2,
112
+ eps, gradient))
245
113
  return success
246
114
 
247
115
 
@@ -436,15 +304,9 @@ class LazyAdam(Optimizer):
436
304
  self.opt = P.Adam(use_locking, use_nesterov)
437
305
  self.sparse_opt = P.FusedSparseLazyAdam(use_locking, use_nesterov)
438
306
  self.sparse_opt.set_device("CPU")
439
- self._ps_pull = P.Pull()
440
- self._ps_push = P.Push("Adam", [0, 1, 2])
441
- self._ps_push.add_prim_attr("use_nesterov", use_nesterov)
442
-
443
- self._init_distributed_opts(use_locking, use_nesterov)
444
307
 
445
308
  @jit
446
309
  def construct(self, gradients):
447
- gradients = self.flatten_gradients(gradients)
448
310
  gradients = self.decay_weight(gradients)
449
311
  gradients = self.gradients_centralization(gradients)
450
312
  gradients = self.scale_grad(gradients)
@@ -457,40 +319,18 @@ class LazyAdam(Optimizer):
457
319
  beta2_power = self.beta2_power * self.beta2
458
320
  self.beta2_power = beta2_power
459
321
 
460
- if self.use_dist_optimizer:
461
- if self.is_group_lr:
462
- success = self.map_reverse(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
463
- self._ps_push, self._ps_pull, self.use_locking, self.use_nesterov,
464
- self._is_device, beta1_power, beta2_power,
465
- self.beta1, self.beta2, self.eps),
466
- lr, gradients, self._parameters, self.moment1, self.moment2,
467
- self.ps_parameters, self.cache_enable, self.dense_lazyadam_opts,
468
- self.use_dense_opt_flags, self.sparse_lazyadam_opts,
469
- self.use_sparse_opt_flags)
470
- else:
471
- success = self.map_reverse(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
472
- self._ps_push, self._ps_pull, self.use_locking, self.use_nesterov,
473
- self._is_device, beta1_power, beta2_power,
474
- self.beta1, self.beta2, self.eps, lr),
475
- gradients, self._parameters, self.moment1, self.moment2,
476
- self.ps_parameters, self.cache_enable, self.dense_lazyadam_opts,
477
- self.use_dense_opt_flags, self.sparse_lazyadam_opts,
478
- self.use_sparse_opt_flags)
322
+ if self.is_group_lr:
323
+ success = self.map_reverse(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
324
+ self.use_locking, self.use_nesterov,
325
+ self._is_device, beta1_power, beta2_power,
326
+ self.beta1, self.beta2, self.eps),
327
+ lr, gradients, self._parameters, self.moment1, self.moment2)
479
328
  else:
480
- if self.is_group_lr:
481
- success = self.map_reverse(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
482
- self._ps_push, self._ps_pull, self.use_locking, self.use_nesterov,
483
- self._is_device, beta1_power, beta2_power,
484
- self.beta1, self.beta2, self.eps),
485
- lr, gradients, self._parameters, self.moment1, self.moment2,
486
- self.ps_parameters, self.cache_enable)
487
- else:
488
- success = self.map_reverse(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
489
- self._ps_push, self._ps_pull, self.use_locking, self.use_nesterov,
490
- self._is_device, beta1_power, beta2_power,
491
- self.beta1, self.beta2, self.eps, lr),
492
- gradients, self._parameters, self.moment1, self.moment2,
493
- self.ps_parameters, self.cache_enable)
329
+ success = self.map_reverse(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
330
+ self.use_locking, self.use_nesterov,
331
+ self._is_device, beta1_power, beta2_power,
332
+ self.beta1, self.beta2, self.eps, lr),
333
+ gradients, self._parameters, self.moment1, self.moment2)
494
334
  return success
495
335
 
496
336
  @Optimizer.target.setter
@@ -500,36 +340,3 @@ class LazyAdam(Optimizer):
500
340
  optimizer operation.
501
341
  """
502
342
  self._set_base_target(value)
503
-
504
- def _init_distributed_opts(self, use_locking, use_nesterov):
505
- self.use_dist_optimizer = self._use_distibuted_optimizer()
506
- self.dense_lazyadam_opts, self.use_dense_opt_flags =\
507
- self._get_distributed_optimizer_list("adam", use_locking, use_nesterov)
508
- self.sparse_lazyadam_opts, self.use_sparse_opt_flags =\
509
- self._get_distributed_optimizer_list("fused_sparse_lazy_adam", use_locking, use_nesterov)
510
-
511
-
512
- def create_distributed_adam(*args, **kwargs):
513
- """
514
- Create the distributed Adam op.
515
- """
516
- adam = P.Adam(*args, **kwargs)
517
- adam.add_prim_attr("gradient_type", "dense_gradient")
518
- adam.add_prim_attr("parameter_input_index", 0)
519
- adam.add_prim_attr("gradient_input_index", 9)
520
- return adam
521
-
522
-
523
- def create_distributed_fused_sparse_lazy_adam(*args, **kwargs):
524
- """
525
- Create the distributed FusedSparseLazyAdam op.
526
- """
527
- sparse_lazy_adam = P.FusedSparseLazyAdam(*args, **kwargs)
528
- sparse_lazy_adam.add_prim_attr("gradient_type", "sparse_gradient")
529
- sparse_lazy_adam.add_prim_attr("parameter_input_index", 0)
530
- sparse_lazy_adam.add_prim_attr("gradient_input_index", 9)
531
- sparse_lazy_adam.add_prim_attr("indices_input_index", 10)
532
- return sparse_lazy_adam
533
-
534
- _register_dist_optimizer("adam", create_distributed_adam)
535
- _register_dist_optimizer("fused_sparse_lazy_adam", create_distributed_fused_sparse_lazy_adam)
@@ -23,7 +23,6 @@ import mindspore.common.dtype as mstype
23
23
  from mindspore import _checkparam as Validator
24
24
  from mindspore.nn.optim.optimizer import Optimizer
25
25
  from mindspore.nn.optim.optimizer import opt_init_args_register
26
- from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
27
26
 
28
27
 
29
28
  _momentum_opt = C.MultitypeFuncGraph("momentum_opt")
@@ -35,18 +34,6 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, moment):
35
34
  success = F.depend(True, opt(weight, moment, learning_rate, gradient, momentum))
36
35
  return success
37
36
 
38
-
39
- @_momentum_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Function", "Bool")
40
- def _tensor_run_opt_ext_dist(opt, momentum, learning_rate, gradient, weight, moment,
41
- distributed_opt, use_flag):
42
- """Apply momentum optimizer to the weight parameter using Tensor."""
43
- if use_flag:
44
- success = F.depend(True, distributed_opt(weight, moment, learning_rate, gradient, momentum))
45
- else:
46
- success = F.depend(True, opt(weight, moment, learning_rate, gradient, momentum))
47
- return success
48
-
49
-
50
37
  class Momentum(Optimizer):
51
38
  r"""
52
39
  Implements the Momentum algorithm.
@@ -196,45 +183,20 @@ class Momentum(Optimizer):
196
183
  self.moments = self.params.clone(prefix="moments", init='zeros')
197
184
  self.opt = P.ApplyMomentum(use_nesterov=self.use_nesterov)
198
185
 
199
- self.distributed_opts, self.use_distributed_opt_flags =\
200
- self._get_distributed_optimizer_list("momentum", use_nesterov=self.use_nesterov)
201
- self.use_dist_optimizer = self._use_distibuted_optimizer()
202
186
 
203
187
  @jit(backend="ms_backend")
204
188
  def construct(self, gradients):
205
189
  params = self.params
206
190
  moments = self.moments
207
- gradients = self.flatten_gradients(gradients)
208
191
  gradients = self.decay_weight(gradients)
209
192
  gradients = self.gradients_centralization(gradients)
210
193
  gradients = self.scale_grad(gradients)
211
194
  lr = self.get_lr()
212
195
  self.assignadd(self.global_step, self.global_step_increase_tensor)
213
- if self.use_dist_optimizer:
214
- if self.is_group_lr:
215
- success = self.hyper_map_reverse(F.partial(_momentum_opt, self.opt, self.momentum),
216
- lr, gradients, params, moments,
217
- self.distributed_opts, self.use_distributed_opt_flags)
218
- else:
219
- success = self.hyper_map_reverse(F.partial(_momentum_opt, self.opt, self.momentum, lr),
220
- gradients, params, moments,
221
- self.distributed_opts, self.use_distributed_opt_flags)
196
+ if self.is_group_lr:
197
+ success = self.hyper_map_reverse(F.partial(_momentum_opt, self.opt, self.momentum),
198
+ lr, gradients, params, moments)
222
199
  else:
223
- if self.is_group_lr:
224
- success = self.hyper_map_reverse(F.partial(_momentum_opt, self.opt, self.momentum),
225
- lr, gradients, params, moments)
226
- else:
227
- success = self.hyper_map_reverse(F.partial(_momentum_opt, self.opt, self.momentum, lr),
228
- gradients, params, moments)
200
+ success = self.hyper_map_reverse(F.partial(_momentum_opt, self.opt, self.momentum, lr),
201
+ gradients, params, moments)
229
202
  return success
230
-
231
-
232
- def _create_distributed_momentum(*args, **kwargs):
233
- momentum = P.ApplyMomentum(*args, **kwargs)
234
- momentum.add_prim_attr("gradient_type", "dense_gradient")
235
- momentum.add_prim_attr("parameter_input_index", 0)
236
- momentum.add_prim_attr("gradient_input_index", 3)
237
- return momentum
238
-
239
-
240
- _register_dist_optimizer("momentum", _create_distributed_momentum)
@@ -1,4 +1,4 @@
1
- # Copyright 2020-2022 Huawei Technologies Co., Ltd
1
+ # Copyright 2020-2021 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -32,11 +32,9 @@ import mindspore.common.dtype as mstype
32
32
  from mindspore import _checkparam as validator
33
33
  from mindspore import log as logger
34
34
  from mindspore.parallel._utils import _get_global_rank, _get_device_num, _get_parallel_mode
35
- from mindspore.parallel._ps_context import _is_ps_mode
36
35
  from mindspore.context import ParallelMode
37
36
  from mindspore import context
38
37
  from mindspore.nn.learning_rate_schedule import LearningRateSchedule
39
- from mindspore.nn.optim._dist_optimizer_registry import generate_dist_optimizer_list
40
38
 
41
39
  __all__ = ['Optimizer', 'opt_init_args_register']
42
40
 
@@ -161,7 +159,6 @@ class Optimizer(Cell):
161
159
  ... def construct(self, gradients):
162
160
  ... params = self.parameters
163
161
  ... lr = self.get_lr()
164
- ... gradients = self.flatten_gradients(gradients)
165
162
  ... gradients = self.decay_weight(gradients)
166
163
  ... gradients = self.gradients_centralization(gradients)
167
164
  ... gradients = self.scale_grad(gradients)
@@ -218,15 +215,12 @@ class Optimizer(Cell):
218
215
 
219
216
  self._unique = True
220
217
  self._target = context.get_context("device_target")
221
- self._use_flattened_params = False
222
- self._grad_fusion_size = 0
223
218
  self.dynamic_lr = False
224
219
  self.assignadd = P.AssignAdd()
225
220
  self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step')
226
221
  self.is_group = False
227
222
  self.is_group_lr = False
228
223
  self.is_group_params_ordered = False
229
- self.use_parallel = False
230
224
  learning_rate = self._preprocess_single_lr(learning_rate)
231
225
  if isinstance(parameters[0], dict):
232
226
  self.is_group = True
@@ -263,11 +257,7 @@ class Optimizer(Cell):
263
257
  self.grad_centralization_flags = tuple(self.group_grad_centralization)
264
258
  else:
265
259
  self.parameters = ParameterTuple(parameters)
266
- flat_params = self._get_flattened_params(parameters)
267
- if self._use_flattened_params:
268
- self._parameters = ParameterTuple(flat_params)
269
- else:
270
- self._parameters = self.parameters
260
+ self._parameters = self.parameters
271
261
  decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
272
262
  self.decay_flags = tuple(decay_filter(x) for x in self._parameters)
273
263
  self.dynamic_decay_flags = isinstance(weight_decay, Cell)
@@ -281,10 +271,6 @@ class Optimizer(Cell):
281
271
  # set user's parameters as local parameters
282
272
  for param in self._parameters:
283
273
  self._user_parameters.append(param.name)
284
- ps_filter = lambda x: x.is_param_ps
285
- self.ps_parameters = tuple(ps_filter(x) for x in self._parameters)
286
- cache_filter = lambda x: x.cache_enable
287
- self.cache_enable = tuple(cache_filter(x) for x in self._parameters)
288
274
  self.reciprocal_scale = Tensor(1.0 / self.loss_scale, mstype.float32)
289
275
  self.need_scale = self.loss_scale != 1.0
290
276
  self.global_step_increase_tensor = Tensor([1], mstype.int32)
@@ -296,28 +282,6 @@ class Optimizer(Cell):
296
282
  self._use_parallel_optimizer()
297
283
  self.enable_tuple_broaden = True
298
284
 
299
- def _get_flattened_params(self, parameters):
300
- """Get parameters for each contiguous memory chunks used by input parameters if they are flattened."""
301
- if self.is_group:
302
- # We don't use flattened parameters when parameters are grouped.
303
- return parameters
304
- # Check whether parameters are flattened.
305
- flattened = Tensor._is_flattened(parameters) # pylint: disable=W0212
306
- if not flattened:
307
- # Parameters are not flattened.
308
- return parameters
309
- # Try to get chunk tensors from flattened parameters.
310
- chunk_tensors = Tensor._get_flattened_tensors(parameters) # pylint: disable=W0212
311
- if not chunk_tensors:
312
- # Failed to get chunk tensors.
313
- logger.warning("Parameters are not properly flattened, fallback to not flattened parameters.")
314
- return parameters
315
- # Convert chunk tensors to parameters.
316
- self._use_flattened_params = True
317
- self._grad_fusion_size = Tensor._get_fusion_size(chunk_tensors) # pylint: disable=W0212
318
- return [Parameter._from_tensor(t, name='_chunk_param' + str(i) + '_' + str(t.dtype)) # pylint: disable=W0212
319
- for i, t in enumerate(chunk_tensors)]
320
-
321
285
  def _use_parallel_optimizer(self):
322
286
  """Indicates whether to use automatic parallelism."""
323
287
  if context.get_auto_parallel_context("enable_parallel_optimizer"):
@@ -331,7 +295,10 @@ class Optimizer(Cell):
331
295
  raise RuntimeError("For 'Optimizer', parallel optimizer is not supported in {}, you should set "
332
296
  "parallel mode to 'data_parallel', 'semi_auto_parallel' or 'auto_parallel'."
333
297
  .format(_get_parallel_mode()))
334
-
298
+ else:
299
+ self.use_parallel = False
300
+ else:
301
+ self.use_parallel = False
335
302
  if self.use_parallel:
336
303
  if not self._support_parallel_optimizer:
337
304
  raise RuntimeError("For 'Optimizer', parallel optimizer only support optimizer 'Lamb' and "
@@ -403,13 +370,6 @@ class Optimizer(Cell):
403
370
  raise ValueError(f"For 'Optimizer', the argument {param_info} must not be empty.")
404
371
  return parameters
405
372
 
406
- @staticmethod
407
- def _use_distibuted_optimizer():
408
- """
409
- Whether use distributed optimizers.
410
- """
411
- return _is_ps_mode()
412
-
413
373
  def flatten_gradients(self, gradients):
414
374
  """
415
375
  Flatten gradients into several chunk tensors grouped by data type if network parameters are flattened.
@@ -424,9 +384,6 @@ class Optimizer(Cell):
424
384
  Returns:
425
385
  tuple[Tensor], The gradients after flattened, or the original gradients if parameters are not flattened.
426
386
  """
427
- if self._use_flattened_params:
428
- flatten_concat = inner.FlattenConcat(fusion_size=self._grad_fusion_size)
429
- return flatten_concat(gradients)
430
387
  return gradients
431
388
 
432
389
  def decay_weight(self, gradients):
@@ -869,12 +826,6 @@ class Optimizer(Cell):
869
826
  F.assign(param_group[root][i], next_params[i])
870
827
  return new_param_group
871
828
 
872
- def _get_distributed_optimizer_list(self, optimizer_type, *args, **kwargs):
873
- """
874
- Get the distributed optimizers list in distributed training mode.
875
- """
876
- return generate_dist_optimizer_list(optimizer_type, self._parameters, *args, **kwargs)
877
-
878
829
  def construct(self, *hyper_params):
879
830
  raise NotImplementedError
880
831
 
@@ -205,7 +205,6 @@ class ProximalAdagrad(Optimizer):
205
205
  def construct(self, grads):
206
206
  params = self._parameters
207
207
  accum = self.accum
208
- grads = self.flatten_gradients(grads)
209
208
  grads = self.decay_weight(grads)
210
209
  grads = self.gradients_centralization(grads)
211
210
  grads = self.scale_grad(grads)
@@ -238,7 +238,6 @@ class RMSProp(Optimizer):
238
238
  @jit
239
239
  def construct(self, gradients):
240
240
  params = self._parameters
241
- gradients = self.flatten_gradients(gradients)
242
241
  gradients = self.decay_weight(gradients)
243
242
  gradients = self.gradients_centralization(gradients)
244
243
  gradients = self.scale_grad(gradients)
@@ -203,7 +203,6 @@ class Rprop(Optimizer):
203
203
 
204
204
  @jit(backend="ms_backend")
205
205
  def construct(self, gradients):
206
- gradients = self.flatten_gradients(gradients)
207
206
  gradients = self.decay_weight(gradients)
208
207
  gradients = self.gradients_centralization(gradients)
209
208
  gradients = self.scale_grad(gradients)
mindspore/nn/optim/sgd.py CHANGED
@@ -226,7 +226,6 @@ class SGD(Optimizer):
226
226
  accum = self.accum
227
227
  stat = self.stat
228
228
  gradients = self.decay_weight(gradients)
229
- gradients = self.flatten_gradients(gradients)
230
229
  gradients = self.gradients_centralization(gradients)
231
230
  gradients = self.scale_grad(gradients)
232
231
  lr = self.get_lr()
@@ -108,7 +108,6 @@ class OptTFTWrapper(Optimizer):
108
108
  self.dynamic_decay_flags = opt.dynamic_decay_flags
109
109
  self.weight_decay = opt.weight_decay
110
110
  self.exec_weight_decay = opt.exec_weight_decay
111
- self.ps_parameters = opt.ps_parameters
112
111
  self.cache_enable = opt.cache_enable
113
112
  self.reciprocal_scale = opt.reciprocal_scale
114
113
  self.need_scale = opt.need_scale
@@ -585,7 +585,6 @@ class ThorGpu(Optimizer):
585
585
  def construct(self, gradients):
586
586
  params = self.params
587
587
  moments = self.moments
588
- gradients = self.flatten_gradients(gradients)
589
588
  gradients = self.scale_grad(gradients)
590
589
  damping_step = self.gather(self.damping, self.cov_step, self.axis)
591
590
  damping_step = self.cast(damping_step, mstype.float32)
@@ -1247,7 +1246,6 @@ class ThorAscend(Optimizer):
1247
1246
  def construct(self, gradients):
1248
1247
  params = self.params
1249
1248
  moments = self.moments
1250
- gradients = self.flatten_gradients(gradients)
1251
1249
  gradients = self.scale_grad(gradients)
1252
1250
  damping_step = self.gather(self.damping, self.cov_step, self.axis)
1253
1251
  damping_step = self.cast(damping_step, mstype.float32)
@@ -15,8 +15,7 @@
15
15
  """Bijector"""
16
16
  from mindspore import context
17
17
  from mindspore.nn.cell import Cell
18
- from mindspore.ops import operations as P
19
- from mindspore.ops import functional as F
18
+ import mindspore.ops as ops
20
19
  from mindspore.ops.operations import _inner_ops as inner
21
20
  from mindspore.common import dtype as mstype
22
21
  from mindspore.common.tensor import Tensor
@@ -99,9 +98,9 @@ class Bijector(Cell):
99
98
  self.checktensor = CheckTensor()
100
99
 
101
100
  # ops needed for the base class
102
- self.cast_base = P.Cast()
103
- self.dtype_base = P.DType()
104
- self.shape_base = P.Shape()
101
+ self.cast_base = ops.Cast()
102
+ self.dtype_base = ops.DType()
103
+ self.shape_base = ops.Shape()
105
104
  self.sametypeshape_base = inner.SameTypeShape()
106
105
  self.issubclass_base = inner.IsSubClass()
107
106
 
@@ -145,13 +144,13 @@ class Bijector(Cell):
145
144
  if self.issubclass_base(value_type, mstype.float_):
146
145
  return value
147
146
  return raise_type_error('input value of bijector', value_type, mstype.float_)
148
- dtype_tensor = F.fill(self.dtype, self.shape_base(value), 0.0)
147
+ dtype_tensor = ops.fill(self.dtype, self.shape_base(value), 0.0)
149
148
  self.sametypeshape_base(value, dtype_tensor)
150
149
  return value
151
150
 
152
151
  def _shape_mapping(self, shape):
153
- shape_tensor = F.fill(self.parameter_type, shape, 0.0)
154
- dist_shape_tensor = F.fill(
152
+ shape_tensor = ops.fill(self.parameter_type, shape, 0.0)
153
+ dist_shape_tensor = ops.fill(
155
154
  self.parameter_type, self.batch_shape, 0.0)
156
155
  return (shape_tensor + dist_shape_tensor).shape
157
156
 
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
  # ============================================================================
15
15
  """GumbelCDF Bijector"""
16
- from mindspore.ops import operations as P
16
+ import mindspore.ops as ops
17
17
  from ..distribution._utils.utils import check_greater_zero
18
18
  from ..distribution._utils.custom_ops import exp_generic, log_generic
19
19
  from .bijector import Bijector
@@ -86,7 +86,7 @@ class GumbelCDF(Bijector):
86
86
  self._scale = self._add_parameter(scale, 'scale')
87
87
  check_greater_zero(self._scale, "scale")
88
88
 
89
- self.cast = P.Cast()
89
+ self.cast = ops.Cast()
90
90
  self.exp = exp_generic
91
91
  self.log = log_generic
92
92