mindspore 2.7.0__cp311-cp311-win_amd64.whl → 2.7.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (290) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -1
  3. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  6. mindspore/_extends/parse/compile_config.py +24 -1
  7. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -2
  8. mindspore/_extends/parse/resources.py +1 -1
  9. mindspore/_extends/parse/standard_method.py +8 -1
  10. mindspore/_extends/parse/trope.py +2 -1
  11. mindspore/_extends/pijit/pijit_func_white_list.py +7 -22
  12. mindspore/avcodec-59.dll +0 -0
  13. mindspore/avdevice-59.dll +0 -0
  14. mindspore/avfilter-8.dll +0 -0
  15. mindspore/avformat-59.dll +0 -0
  16. mindspore/avutil-57.dll +0 -0
  17. mindspore/boost/base.py +29 -2
  18. mindspore/common/_decorator.py +3 -2
  19. mindspore/common/_grad_function.py +3 -1
  20. mindspore/common/_tensor_cpp_method.py +1 -1
  21. mindspore/common/_tensor_docs.py +275 -64
  22. mindspore/common/_utils.py +0 -44
  23. mindspore/common/api.py +285 -35
  24. mindspore/common/dump.py +7 -108
  25. mindspore/common/dynamic_shape/auto_dynamic_shape.py +1 -3
  26. mindspore/common/hook_handle.py +60 -0
  27. mindspore/common/jit_config.py +5 -1
  28. mindspore/common/jit_trace.py +27 -12
  29. mindspore/common/lazy_inline.py +5 -3
  30. mindspore/common/parameter.py +13 -107
  31. mindspore/common/recompute.py +4 -11
  32. mindspore/common/tensor.py +16 -169
  33. mindspore/communication/_comm_helper.py +11 -1
  34. mindspore/communication/comm_func.py +138 -4
  35. mindspore/communication/management.py +85 -1
  36. mindspore/config/op_info.config +0 -15
  37. mindspore/context.py +5 -85
  38. mindspore/dataset/engine/datasets.py +8 -4
  39. mindspore/dataset/engine/datasets_vision.py +1 -1
  40. mindspore/dataset/engine/validators.py +1 -15
  41. mindspore/dnnl.dll +0 -0
  42. mindspore/{experimental/llm_boost/ascend_native → graph}/__init__.py +7 -7
  43. mindspore/graph/custom_pass.py +55 -0
  44. mindspore/include/dataset/execute.h +2 -2
  45. mindspore/jpeg62.dll +0 -0
  46. mindspore/mindrecord/__init__.py +3 -3
  47. mindspore/mindrecord/common/exceptions.py +1 -0
  48. mindspore/mindrecord/config.py +1 -1
  49. mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
  50. mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
  51. mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
  52. mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
  53. mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
  54. mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
  55. mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
  56. mindspore/mindrecord/filereader.py +4 -4
  57. mindspore/mindrecord/filewriter.py +5 -5
  58. mindspore/mindrecord/mindpage.py +2 -2
  59. mindspore/mindrecord/tools/cifar10.py +1 -1
  60. mindspore/mindrecord/tools/cifar100.py +1 -1
  61. mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
  62. mindspore/mindrecord/tools/cifar10_to_mr.py +1 -1
  63. mindspore/mindrecord/tools/csv_to_mr.py +1 -1
  64. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  65. mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
  66. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
  67. mindspore/mindspore_backend_common.dll +0 -0
  68. mindspore/mindspore_backend_manager.dll +0 -0
  69. mindspore/mindspore_cluster.dll +0 -0
  70. mindspore/mindspore_common.dll +0 -0
  71. mindspore/mindspore_core.dll +0 -0
  72. mindspore/mindspore_cpu.dll +0 -0
  73. mindspore/mindspore_dump.dll +0 -0
  74. mindspore/mindspore_frontend.dll +0 -0
  75. mindspore/mindspore_glog.dll +0 -0
  76. mindspore/mindspore_hardware_abstract.dll +0 -0
  77. mindspore/mindspore_memory_pool.dll +0 -0
  78. mindspore/mindspore_ms_backend.dll +0 -0
  79. mindspore/mindspore_ops.dll +0 -0
  80. mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
  81. mindspore/mindspore_profiler.dll +0 -0
  82. mindspore/mindspore_pyboost.dll +0 -0
  83. mindspore/mindspore_pynative.dll +0 -0
  84. mindspore/mindspore_runtime_pipeline.dll +0 -0
  85. mindspore/mindspore_runtime_utils.dll +0 -0
  86. mindspore/mindspore_tools.dll +0 -0
  87. mindspore/mint/__init__.py +15 -10
  88. mindspore/mint/distributed/distributed.py +182 -62
  89. mindspore/mint/nn/__init__.py +2 -16
  90. mindspore/mint/nn/functional.py +4 -110
  91. mindspore/mint/nn/layer/__init__.py +0 -2
  92. mindspore/mint/nn/layer/activation.py +0 -6
  93. mindspore/mint/nn/layer/basic.py +0 -47
  94. mindspore/mint/nn/layer/conv.py +4 -4
  95. mindspore/mint/nn/layer/normalization.py +8 -13
  96. mindspore/mint/nn/layer/pooling.py +0 -4
  97. mindspore/nn/__init__.py +1 -3
  98. mindspore/nn/cell.py +16 -66
  99. mindspore/nn/layer/basic.py +49 -1
  100. mindspore/nn/layer/container.py +16 -0
  101. mindspore/nn/layer/embedding.py +4 -169
  102. mindspore/nn/layer/normalization.py +2 -1
  103. mindspore/nn/layer/thor_layer.py +4 -85
  104. mindspore/nn/optim/ada_grad.py +0 -1
  105. mindspore/nn/optim/adafactor.py +0 -1
  106. mindspore/nn/optim/adam.py +31 -124
  107. mindspore/nn/optim/adamax.py +0 -1
  108. mindspore/nn/optim/asgd.py +0 -1
  109. mindspore/nn/optim/ftrl.py +8 -102
  110. mindspore/nn/optim/lamb.py +0 -1
  111. mindspore/nn/optim/lars.py +0 -3
  112. mindspore/nn/optim/lazyadam.py +25 -218
  113. mindspore/nn/optim/momentum.py +5 -43
  114. mindspore/nn/optim/optimizer.py +6 -55
  115. mindspore/nn/optim/proximal_ada_grad.py +0 -1
  116. mindspore/nn/optim/rmsprop.py +0 -1
  117. mindspore/nn/optim/rprop.py +0 -1
  118. mindspore/nn/optim/sgd.py +0 -1
  119. mindspore/nn/optim/tft_wrapper.py +0 -1
  120. mindspore/nn/optim/thor.py +0 -2
  121. mindspore/nn/probability/bijector/bijector.py +7 -8
  122. mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
  123. mindspore/nn/probability/bijector/power_transform.py +20 -21
  124. mindspore/nn/probability/bijector/scalar_affine.py +5 -5
  125. mindspore/nn/probability/bijector/softplus.py +13 -14
  126. mindspore/nn/wrap/grad_reducer.py +4 -74
  127. mindspore/numpy/array_creations.py +2 -2
  128. mindspore/numpy/fft.py +9 -9
  129. mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
  130. mindspore/onnx/onnx_export.py +137 -0
  131. mindspore/opencv_core4110.dll +0 -0
  132. mindspore/opencv_imgcodecs4110.dll +0 -0
  133. mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
  134. mindspore/ops/__init__.py +2 -0
  135. mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
  136. mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
  137. mindspore/ops/_op_impl/cpu/__init__.py +0 -5
  138. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +16 -22
  139. mindspore/ops/auto_generate/gen_extend_func.py +2 -7
  140. mindspore/ops/auto_generate/gen_ops_def.py +98 -141
  141. mindspore/ops/auto_generate/gen_ops_prim.py +12708 -12686
  142. mindspore/ops/communication.py +97 -0
  143. mindspore/ops/composite/__init__.py +5 -2
  144. mindspore/ops/composite/base.py +15 -1
  145. mindspore/ops/composite/multitype_ops/__init__.py +3 -1
  146. mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
  147. mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
  148. mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
  149. mindspore/ops/function/__init__.py +1 -0
  150. mindspore/ops/function/array_func.py +14 -12
  151. mindspore/ops/function/comm_func.py +3883 -0
  152. mindspore/ops/function/debug_func.py +3 -4
  153. mindspore/ops/function/math_func.py +45 -54
  154. mindspore/ops/function/nn_func.py +75 -294
  155. mindspore/ops/function/random_func.py +9 -18
  156. mindspore/ops/functional.py +2 -0
  157. mindspore/ops/functional_overload.py +354 -18
  158. mindspore/ops/operations/__init__.py +2 -5
  159. mindspore/ops/operations/_custom_ops_utils.py +7 -9
  160. mindspore/ops/operations/_inner_ops.py +1 -38
  161. mindspore/ops/operations/_rl_inner_ops.py +0 -933
  162. mindspore/ops/operations/array_ops.py +1 -0
  163. mindspore/ops/operations/comm_ops.py +94 -2
  164. mindspore/ops/operations/custom_ops.py +228 -19
  165. mindspore/ops/operations/debug_ops.py +27 -29
  166. mindspore/ops/operations/manually_defined/ops_def.py +27 -306
  167. mindspore/ops/operations/nn_ops.py +2 -2
  168. mindspore/ops/operations/sparse_ops.py +0 -83
  169. mindspore/ops/primitive.py +1 -17
  170. mindspore/ops/tensor_method.py +72 -3
  171. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
  172. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
  173. mindspore/ops_generate/api/functions_cc_generator.py +53 -4
  174. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
  175. mindspore/ops_generate/common/gen_constants.py +11 -10
  176. mindspore/ops_generate/common/op_proto.py +18 -1
  177. mindspore/ops_generate/common/template.py +102 -245
  178. mindspore/ops_generate/common/template_utils.py +212 -0
  179. mindspore/ops_generate/gen_custom_ops.py +69 -0
  180. mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
  181. mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
  182. mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
  183. mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
  184. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
  185. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
  186. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
  187. mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
  188. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
  189. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
  190. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
  191. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
  192. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
  193. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
  194. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
  195. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
  196. mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
  197. mindspore/ops_generate/resources/yaml_loader.py +13 -0
  198. mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
  199. mindspore/parallel/_cell_wrapper.py +1 -1
  200. mindspore/parallel/_parallel_serialization.py +1 -4
  201. mindspore/parallel/_utils.py +29 -6
  202. mindspore/parallel/checkpoint_transform.py +18 -2
  203. mindspore/parallel/cluster/process_entity/_api.py +24 -32
  204. mindspore/parallel/cluster/process_entity/_utils.py +9 -5
  205. mindspore/{experimental/llm_boost/atb → parallel/distributed}/__init__.py +21 -23
  206. mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
  207. mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
  208. mindspore/parallel/strategy.py +336 -0
  209. mindspore/parallel/transform_safetensors.py +117 -16
  210. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +3 -0
  211. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
  212. mindspore/profiler/common/constant.py +5 -0
  213. mindspore/profiler/common/file_manager.py +9 -0
  214. mindspore/profiler/common/msprof_cmd_tool.py +38 -2
  215. mindspore/profiler/common/path_manager.py +56 -24
  216. mindspore/profiler/common/profiler_context.py +2 -12
  217. mindspore/profiler/common/profiler_info.py +3 -3
  218. mindspore/profiler/common/profiler_path_manager.py +13 -0
  219. mindspore/profiler/common/util.py +30 -3
  220. mindspore/profiler/experimental_config.py +2 -1
  221. mindspore/profiler/platform/npu_profiler.py +33 -6
  222. mindspore/run_check/_check_version.py +108 -24
  223. mindspore/runtime/__init__.py +3 -2
  224. mindspore/runtime/executor.py +11 -3
  225. mindspore/runtime/memory.py +112 -0
  226. mindspore/swresample-4.dll +0 -0
  227. mindspore/swscale-6.dll +0 -0
  228. mindspore/tinyxml2.dll +0 -0
  229. mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
  230. mindspore/tools/data_dump.py +130 -0
  231. mindspore/tools/sdc_detect.py +91 -0
  232. mindspore/tools/stress_detect.py +63 -0
  233. mindspore/train/__init__.py +6 -6
  234. mindspore/train/_utils.py +5 -18
  235. mindspore/train/amp.py +6 -4
  236. mindspore/train/callback/_checkpoint.py +0 -9
  237. mindspore/train/callback/_train_fault_tolerance.py +69 -18
  238. mindspore/train/data_sink.py +1 -5
  239. mindspore/train/model.py +38 -211
  240. mindspore/train/serialization.py +126 -387
  241. mindspore/turbojpeg.dll +0 -0
  242. mindspore/utils/__init__.py +6 -3
  243. mindspore/utils/dlpack.py +92 -0
  244. mindspore/utils/dryrun.py +1 -1
  245. mindspore/utils/runtime_execution_order_check.py +10 -0
  246. mindspore/utils/sdc_detect.py +14 -12
  247. mindspore/utils/stress_detect.py +43 -0
  248. mindspore/utils/utils.py +144 -8
  249. mindspore/version.py +1 -1
  250. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
  251. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/RECORD +254 -267
  252. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -210
  253. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
  254. mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
  255. mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
  256. mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
  257. mindspore/experimental/llm_boost/register.py +0 -130
  258. mindspore/experimental/llm_boost/utils.py +0 -31
  259. mindspore/include/OWNERS +0 -7
  260. mindspore/mindspore_cpu_res_manager.dll +0 -0
  261. mindspore/mindspore_ops_kernel_common.dll +0 -0
  262. mindspore/mindspore_res_manager.dll +0 -0
  263. mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
  264. mindspore/nn/reinforcement/_batch_read_write.py +0 -142
  265. mindspore/nn/reinforcement/_tensors_queue.py +0 -152
  266. mindspore/nn/reinforcement/tensor_array.py +0 -145
  267. mindspore/opencv_core452.dll +0 -0
  268. mindspore/opencv_imgcodecs452.dll +0 -0
  269. mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
  270. mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
  271. mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
  272. mindspore/ops/_op_impl/cpu/buffer_append.py +0 -28
  273. mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
  274. mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
  275. mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
  276. mindspore/ops/operations/_tensor_array.py +0 -359
  277. mindspore/ops/operations/rl_ops.py +0 -288
  278. mindspore/parallel/_offload_context.py +0 -275
  279. mindspore/parallel/_recovery_context.py +0 -115
  280. mindspore/parallel/_transformer/__init__.py +0 -35
  281. mindspore/parallel/_transformer/layers.py +0 -765
  282. mindspore/parallel/_transformer/loss.py +0 -251
  283. mindspore/parallel/_transformer/moe.py +0 -693
  284. mindspore/parallel/_transformer/op_parallel_config.py +0 -222
  285. mindspore/parallel/_transformer/transformer.py +0 -3124
  286. mindspore/parallel/mpi/_mpi_config.py +0 -116
  287. mindspore/train/memory_profiling_pb2.py +0 -298
  288. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
  289. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
  290. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2020-2022 Huawei Technologies Co., Ltd
1
+ # Copyright 2020-2021 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -28,7 +28,6 @@ from mindspore.common.tensor import Tensor
28
28
  from mindspore import _checkparam as validator
29
29
  from mindspore.nn.optim.optimizer import Optimizer
30
30
  from mindspore.nn.optim.optimizer import opt_init_args_register
31
- from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
32
31
  from mindspore.common._decorator import deprecated
33
32
 
34
33
  _adam_opt = C.MultitypeFuncGraph("adam_opt")
@@ -727,7 +726,6 @@ class Adam(Optimizer):
727
726
  self.opt = P.Adam(use_locking, use_nesterov)
728
727
  self.sparse_opt = P.FusedSparseLazyAdam(use_locking, use_nesterov)
729
728
  self.sparse_opt.set_device("CPU")
730
- self._init_distributed_opts(use_locking, use_nesterov)
731
729
 
732
730
  else:
733
731
  self._is_device = True
@@ -737,7 +735,6 @@ class Adam(Optimizer):
737
735
  self.opt = P.Adam(use_locking, use_nesterov)
738
736
  self.sparse_opt = P.FusedSparseAdam(use_locking, use_nesterov)
739
737
  self.sparse_opt.set_device("CPU")
740
- self._init_distributed_opts(use_locking, use_nesterov)
741
738
 
742
739
  def _apply_adam(self, params, beta1_power, beta2_power, moment1, moment2, lr, gradients):
743
740
  """Execute Adam optimizer and its variants."""
@@ -750,83 +747,44 @@ class Adam(Optimizer):
750
747
  self.beta2, self.eps, lr), gradients, params, moment1, moment2)
751
748
  # Lazy adam or normal adam
752
749
  else:
753
- if self.use_dist_optimizer:
754
- if self.use_dist_optimizer and self.use_amsgrad:
755
- raise ValueError(f"Adam with amsgrad is currently not supporting distributed training!"
756
- f"Please set use_amsgrad=False for distributed training.")
757
- if self.is_group_lr:
758
- if self.use_lazy:
759
- success = self.map_reverse(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
760
- self.use_locking, self.use_nesterov,
761
- self._is_device, beta1_power, beta2_power,
762
- self.beta1, self.beta2, self.eps),
763
- lr, gradients, self._parameters, self.moment1, self.moment2,
764
- self.dense_lazyadam_opts,
765
- self.use_dense_opt_flags, self.sparse_lazyadam_opts,
766
- self.use_sparse_opt_flags)
767
- # Normal Adam
768
- else:
769
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self.use_locking,
770
- self.use_nesterov, self._is_device, beta1_power, beta2_power,
771
- self.beta1, self.beta2, self.eps),
772
- lr, gradients, params, moment1, moment2,
773
- self.dense_adam_opts, self.use_dense_opt_flags,
774
- self.sparse_adam_opts, self.use_sparse_opt_flags)
750
+ if self.is_group_lr:
751
+ if self.use_lazy:
752
+ success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
753
+ self.use_locking, self.use_nesterov,
754
+ self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
755
+ self.eps), lr, gradients, params, moment1, moment2)
775
756
  else:
776
- if self.use_lazy:
777
- success = self.map_reverse(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
778
- self.use_locking, self.use_nesterov,
779
- self._is_device, beta1_power, beta2_power, self.beta1,
780
- self.beta2, self.eps, lr), gradients, self._parameters,
781
- self.moment1, self.moment2,
782
- self.dense_lazyadam_opts, self.use_dense_opt_flags,
783
- self.sparse_lazyadam_opts, self.use_sparse_opt_flags)
757
+ if self.use_amsgrad:
758
+ success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
759
+ self.use_locking, self.use_nesterov,
760
+ self._is_device, beta1_power, beta2_power,
761
+ self.beta1, self.beta2, self.eps), lr, gradients, params,
762
+ moment1, moment2, self.vhat)
784
763
  else:
785
764
  success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
786
765
  self.use_locking, self.use_nesterov,
787
- self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
788
- self.eps, lr), gradients, params, moment1, moment2,
789
- self.dense_adam_opts,
790
- self.use_dense_opt_flags, self.sparse_adam_opts, self.use_sparse_opt_flags)
766
+ self._is_device, beta1_power, beta2_power,
767
+ self.beta1, self.beta2, self.eps), lr, gradients, params,
768
+ moment1, moment2)
791
769
  else:
792
- if self.is_group_lr:
793
- if self.use_lazy:
794
- success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
795
- self.use_locking, self.use_nesterov,
796
- self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
797
- self.eps), lr, gradients, params, moment1, moment2)
798
- else:
799
- if self.use_amsgrad:
800
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
801
- self.use_locking, self.use_nesterov,
802
- self._is_device, beta1_power, beta2_power,
803
- self.beta1, self.beta2, self.eps), lr, gradients, params,
804
- moment1, moment2, self.vhat)
805
- else:
806
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
807
- self.use_locking, self.use_nesterov,
808
- self._is_device, beta1_power, beta2_power,
809
- self.beta1, self.beta2, self.eps), lr, gradients, params,
810
- moment1, moment2)
770
+ if self.use_lazy:
771
+ success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
772
+ self.use_locking, self.use_nesterov,
773
+ self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
774
+ self.eps, lr), gradients, params, moment1, moment2)
811
775
  else:
812
- if self.use_lazy:
813
- success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
776
+ if self.use_amsgrad:
777
+ success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
814
778
  self.use_locking, self.use_nesterov,
815
- self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
816
- self.eps, lr), gradients, params, moment1, moment2)
779
+ self._is_device, beta1_power, beta2_power,
780
+ self.beta1, self.beta2, self.eps, lr), gradients, params,
781
+ moment1, moment2, self.vhat)
817
782
  else:
818
- if self.use_amsgrad:
819
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
820
- self.use_locking, self.use_nesterov,
821
- self._is_device, beta1_power, beta2_power,
822
- self.beta1, self.beta2, self.eps, lr), gradients, params,
823
- moment1, moment2, self.vhat)
824
- else:
825
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
826
- self.use_locking, self.use_nesterov,
827
- self._is_device, beta1_power, beta2_power,
828
- self.beta1, self.beta2, self.eps, lr), gradients, params,
829
- moment1, moment2)
783
+ success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
784
+ self.use_locking, self.use_nesterov,
785
+ self._is_device, beta1_power, beta2_power,
786
+ self.beta1, self.beta2, self.eps, lr), gradients, params,
787
+ moment1, moment2)
830
788
 
831
789
  return success
832
790
 
@@ -835,7 +793,6 @@ class Adam(Optimizer):
835
793
  params = self._parameters
836
794
  moment1 = self.moment1
837
795
  moment2 = self.moment2
838
- gradients = self.flatten_gradients(gradients)
839
796
  gradients = self.decay_weight(gradients)
840
797
  if not self.use_offload:
841
798
  gradients = self.gradients_centralization(gradients)
@@ -859,13 +816,6 @@ class Adam(Optimizer):
859
816
  """
860
817
  self._set_base_target(value)
861
818
 
862
- def _init_distributed_opts(self, use_locking, use_nesterov):
863
- self.use_dist_optimizer = self._use_distibuted_optimizer()
864
- self.dense_adam_opts, self.use_dense_opt_flags = \
865
- self._get_distributed_optimizer_list("adam", use_locking, use_nesterov)
866
- self.sparse_adam_opts, self.use_sparse_opt_flags = \
867
- self._get_distributed_optimizer_list("fused_sparse_adam", use_locking, use_nesterov)
868
-
869
819
 
870
820
  class AdamWeightDecay(Optimizer):
871
821
  r"""
@@ -1028,11 +978,9 @@ class AdamWeightDecay(Optimizer):
1028
978
 
1029
979
  @jit(backend="ms_backend")
1030
980
  def construct(self, gradients):
1031
- gradients = self.flatten_gradients(gradients)
1032
981
  weight_decay = self.get_weight_decay()
1033
982
  lr = self.get_lr()
1034
983
  self.assignadd(self.global_step, self.global_step_increase_tensor)
1035
-
1036
984
  if self.use_fused_opt:
1037
985
  if self.is_group:
1038
986
  if self.is_group_lr:
@@ -1070,19 +1018,6 @@ class AdamWeightDecay(Optimizer):
1070
1018
 
1071
1019
  return optim_result
1072
1020
 
1073
- @Optimizer.target.setter
1074
- def target(self, value):
1075
- """
1076
- If the input value is set to "CPU", the parameters will be updated on the host using the Fused
1077
- optimizer operation.
1078
- """
1079
- self._set_base_target(value)
1080
- if value == 'CPU':
1081
- self.fused_opt.set_device("CPU")
1082
- self.use_fused_opt = True
1083
- else:
1084
- self.use_fused_opt = False
1085
-
1086
1021
 
1087
1022
  class AdamOffload(Optimizer):
1088
1023
  r"""
@@ -1251,7 +1186,6 @@ class AdamOffload(Optimizer):
1251
1186
  params = self._parameters
1252
1187
  moment1 = self.moment1
1253
1188
  moment2 = self.moment2
1254
- gradients = self.flatten_gradients(gradients)
1255
1189
  gradients = self.decay_weight(gradients)
1256
1190
  gradients = self.scale_grad(gradients)
1257
1191
  lr = self.get_lr()
@@ -1270,30 +1204,3 @@ class AdamOffload(Optimizer):
1270
1204
  beta1_power, beta2_power, self.beta1, self.beta2, self.eps, lr),
1271
1205
  gradients, params, moment1, moment2)
1272
1206
  return success
1273
-
1274
-
1275
- def create_distributed_adam(*args, **kwargs):
1276
- """
1277
- Create the distributed Adam op.
1278
- """
1279
- adam = P.Adam(*args, **kwargs)
1280
- adam.add_prim_attr("gradient_type", "dense_gradient")
1281
- adam.add_prim_attr("parameter_input_index", 0)
1282
- adam.add_prim_attr("gradient_input_index", 9)
1283
- return adam
1284
-
1285
-
1286
- def create_distributed_fused_sparse_adam(*args, **kwargs):
1287
- """
1288
- Create the distributed FusedSparseAdam op.
1289
- """
1290
- sparse_adam = P.FusedSparseAdam(*args, **kwargs)
1291
- sparse_adam.add_prim_attr("gradient_type", "sparse_gradient")
1292
- sparse_adam.add_prim_attr("parameter_input_index", 0)
1293
- sparse_adam.add_prim_attr("gradient_input_index", 9)
1294
- sparse_adam.add_prim_attr("indices_input_index", 10)
1295
- return sparse_adam
1296
-
1297
-
1298
- _register_dist_optimizer("adam", create_distributed_adam)
1299
- _register_dist_optimizer("fused_sparse_adam", create_distributed_fused_sparse_adam)
@@ -202,7 +202,6 @@ class AdaMax(Optimizer):
202
202
 
203
203
  @jit
204
204
  def construct(self, gradients):
205
- gradients = self.flatten_gradients(gradients)
206
205
  gradients = self.decay_weight(gradients)
207
206
  gradients = self.gradients_centralization(gradients)
208
207
  gradients = self.scale_grad(gradients)
@@ -184,7 +184,6 @@ class ASGD(Optimizer):
184
184
 
185
185
  @jit(backend="ms_backend")
186
186
  def construct(self, gradients):
187
- gradients = self.flatten_gradients(gradients)
188
187
  gradients = self.decay_weight(gradients)
189
188
  gradients = self.gradients_centralization(gradients)
190
189
  gradients = self.scale_grad(gradients)
@@ -21,27 +21,10 @@ from mindspore.common.api import jit
21
21
  from mindspore import _checkparam as validator
22
22
  from mindspore.nn.optim.optimizer import Optimizer
23
23
  from mindspore.nn.optim.optimizer import opt_init_args_register
24
- from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
25
24
 
26
25
  _ftrl_opt = C.MultitypeFuncGraph("ftrl_opt")
27
26
 
28
27
 
29
- @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
30
- "RowTensor", "Tensor", "Tensor", "Bool", "Function", "Bool", "Function", "Bool")
31
- def _tensor_run_opt_with_sparse_dist(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
32
- gradient, weight, moment, cache_enable,
33
- distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
34
- """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
35
- success = True
36
- indices = gradient.indices
37
- values = gradient.values
38
- if use_sparse_flag:
39
- success = F.depend(success, distributed_sparse_opt(weight, moment, linear, values, indices))
40
- else:
41
- success = F.depend(success, spars_opt(weight, moment, linear, values, indices))
42
- return success
43
-
44
-
45
28
  def _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values):
46
29
  """Apllpy ftrl optimizer for map parameter"""
47
30
  success = True
@@ -78,43 +61,10 @@ def _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, mome
78
61
  return success
79
62
 
80
63
 
81
- @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "MapTensor",
82
- "MapTensor", "MapTensor", "MapTensor", "Bool", "Function", "Bool", "Function", "Bool")
83
- def _run_map_tensor_opt_with_sparse_dist(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
84
- gradient, weight, moment, cache_enable,
85
- distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
86
- """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
87
- success = True
88
- indices, values = gradient.get_data()
89
- if use_sparse_flag:
90
- # PS Mode.
91
- success = F.depend(success, distributed_sparse_opt(weight, moment, linear, values, indices))
92
- elif cache_enable:
93
- # PS Cache mode.
94
- _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values)
95
- else:
96
- raise Exception("Unexpected mode for distributed optimizer.")
97
- return success
98
-
99
-
100
- @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
101
- "Tensor", "Tensor", "Tensor", "Bool", "Function", "Bool", "Function", "Bool")
102
- def _tensor_run_opt_dist(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
103
- gradient, weight, moment, cache_enable,
104
- distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
105
- """Apply ftrl optimizer to the weight parameter."""
106
- success = True
107
- if use_flag:
108
- success = F.depend(success, distributed_opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power))
109
- else:
110
- success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power))
111
- return success
112
-
113
-
114
64
  @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
115
- "RowTensor", "Tensor", "Tensor", "Bool")
65
+ "RowTensor", "Tensor", "Tensor")
116
66
  def _tensor_run_opt_with_sparse(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
117
- gradient, weight, moment, cache_enable):
67
+ gradient, weight, moment):
118
68
  """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
119
69
  success = True
120
70
  indices = gradient.indices
@@ -124,9 +74,9 @@ def _tensor_run_opt_with_sparse(opt, spars_opt, l1, l2, lr_power, learning_rate,
124
74
 
125
75
 
126
76
  @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "MapTensor",
127
- "MapTensor", "MapTensor", "MapTensor", "Bool")
77
+ "MapTensor", "MapTensor", "MapTensor")
128
78
  def _run_map_tensor_opt_with_sparse(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
129
- gradient, weight, moment, cache_enable):
79
+ gradient, weight, moment):
130
80
  """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
131
81
  success = True
132
82
  indices, values = gradient.get_data()
@@ -135,9 +85,9 @@ def _run_map_tensor_opt_with_sparse(opt, spars_opt, l1, l2, lr_power, learning_r
135
85
 
136
86
 
137
87
  @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
138
- "Tensor", "Tensor", "Tensor", "Bool")
88
+ "Tensor", "Tensor", "Tensor")
139
89
  def _tensor_run_opt(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
140
- gradient, weight, moment, cache_enable):
90
+ gradient, weight, moment):
141
91
  """Apply ftrl optimizer to the weight parameter."""
142
92
  success = True
143
93
  success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power))
@@ -320,14 +270,11 @@ class FTRL(Optimizer):
320
270
  self.use_locking = use_locking
321
271
  self.sparse_opt = P.SparseApplyFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking)
322
272
 
323
- self._init_distributed_opts(use_locking, learning_rate, l1, l2, lr_power)
324
-
325
273
  @jit
326
274
  def construct(self, grads):
327
275
  params = self._parameters
328
276
  moments = self.moments
329
277
  linear = self.linear
330
- grads = self.flatten_gradients(grads)
331
278
  grads = self.decay_weight(grads)
332
279
  grads = self.gradients_centralization(grads)
333
280
  grads = self.scale_grad(grads)
@@ -335,14 +282,8 @@ class FTRL(Optimizer):
335
282
  lr = self.get_lr()
336
283
  self.assignadd(self.global_step, self.global_step_increase_tensor)
337
284
 
338
- if self.use_dist_optimizer:
339
- success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, self.l1, self.l2, self.lr_power, lr),
340
- linear, grads, params, moments, self.cache_enable,
341
- self.distributed_opts, self.use_distributed_opt_flags,
342
- self.distributed_sparse_opts, self.use_distributed_sparse_opt_flags)
343
- else:
344
- success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, self.l1, self.l2, self.lr_power, lr),
345
- linear, grads, params, moments, self.cache_enable)
285
+ success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, self.l1, self.l2, self.lr_power, lr),
286
+ linear, grads, params, moments)
346
287
  return success
347
288
 
348
289
  @Optimizer.target.setter
@@ -366,38 +307,3 @@ class FTRL(Optimizer):
366
307
  self.sparse_opt = P.SparseApplyFtrl(self.lr, self.l1, self.l2, self.lr_power, self.use_locking)
367
308
 
368
309
  self._target = value
369
-
370
- def _init_distributed_opts(self, use_locking, learning_rate, l1, l2, lr_power):
371
- self.use_dist_optimizer = self._use_distibuted_optimizer()
372
- self.distributed_opts, self.use_distributed_opt_flags =\
373
- self._get_distributed_optimizer_list("ftrl", use_locking=use_locking)
374
- self.distributed_sparse_opts, self.use_distributed_sparse_opt_flags =\
375
- self._get_distributed_optimizer_list("fused_sparse_ftrl", learning_rate,
376
- l1, l2, lr_power, use_locking=use_locking)
377
-
378
-
379
- def create_distributed_ftrl(*args, **kwargs):
380
- """
381
- Create the distributed ApplyFtrl op.
382
- """
383
- ftrl = P.ApplyFtrl(*args, **kwargs)
384
- ftrl.add_prim_attr("gradient_type", "dense_gradient")
385
- ftrl.add_prim_attr("parameter_input_index", 0)
386
- ftrl.add_prim_attr("gradient_input_index", 3)
387
- return ftrl
388
-
389
-
390
- def create_distributed_fused_sparse_ftrl(*args, **kwargs):
391
- """
392
- Create the distributed FusedSparseFtrl op.
393
- """
394
- sparse_ftrl = P.FusedSparseFtrl(*args, **kwargs)
395
- sparse_ftrl.add_prim_attr("gradient_type", "sparse_gradient")
396
- sparse_ftrl.add_prim_attr("parameter_input_index", 0)
397
- sparse_ftrl.add_prim_attr("gradient_input_index", 3)
398
- sparse_ftrl.add_prim_attr("indices_input_index", 4)
399
- return sparse_ftrl
400
-
401
-
402
- _register_dist_optimizer("ftrl", create_distributed_ftrl)
403
- _register_dist_optimizer("fused_sparse_ftrl", create_distributed_fused_sparse_ftrl)
@@ -269,7 +269,6 @@ class Lamb(Optimizer):
269
269
  lr = self.get_lr()
270
270
  self.assignadd(self.global_step, self.global_step_increase_tensor)
271
271
  lamb_opt = _lamb_opt
272
- gradients = self.flatten_gradients(gradients)
273
272
  gradients = self.gradients_centralization(gradients)
274
273
  if self.is_group:
275
274
  if self.is_group_lr:
@@ -125,8 +125,6 @@ class LARS(Optimizer):
125
125
  self.weight_decay = optimizer.weight_decay
126
126
  self.global_step = optimizer.global_step
127
127
  self.parameters = optimizer.parameters
128
- if optimizer._use_flattened_params: # pylint: disable=W0212
129
- self.opt._use_flattened_params = False # pylint: disable=W0212
130
128
  self._user_parameters += [param.name for param in self.parameters]
131
129
  self.use_clip = use_clip
132
130
  self.lars_flag = tuple(lars_filter(x) for x in self.parameters)
@@ -173,7 +171,6 @@ class LARS(Optimizer):
173
171
  @jit
174
172
  def construct(self, gradients):
175
173
  params = self.parameters
176
- gradients = self.flatten_gradients(gradients)
177
174
  if self.use_clip:
178
175
  lr = self._get_lr()
179
176
  else: