mindspore 2.7.0rc1__cp311-cp311-win_amd64.whl → 2.7.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (370) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +5 -2
  3. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +2 -2
  7. mindspore/_extends/builtin_operations.py +3 -3
  8. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  9. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  10. mindspore/_extends/parse/__init__.py +3 -3
  11. mindspore/_extends/parse/compile_config.py +24 -1
  12. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -3
  13. mindspore/_extends/parse/parser.py +28 -22
  14. mindspore/_extends/parse/resources.py +1 -1
  15. mindspore/_extends/parse/standard_method.py +23 -2
  16. mindspore/_extends/parse/trope.py +2 -1
  17. mindspore/_extends/pijit/pijit_func_white_list.py +9 -27
  18. mindspore/amp.py +0 -18
  19. mindspore/avcodec-59.dll +0 -0
  20. mindspore/avdevice-59.dll +0 -0
  21. mindspore/avfilter-8.dll +0 -0
  22. mindspore/avformat-59.dll +0 -0
  23. mindspore/avutil-57.dll +0 -0
  24. mindspore/boost/base.py +29 -2
  25. mindspore/common/__init__.py +18 -12
  26. mindspore/common/_decorator.py +3 -2
  27. mindspore/common/_grad_function.py +3 -1
  28. mindspore/common/_tensor_cpp_method.py +1 -1
  29. mindspore/common/_tensor_docs.py +371 -96
  30. mindspore/common/_utils.py +7 -43
  31. mindspore/common/api.py +434 -135
  32. mindspore/common/dtype.py +98 -57
  33. mindspore/common/dump.py +7 -108
  34. mindspore/common/dynamic_shape/__init__.py +0 -0
  35. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +15 -23
  36. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  37. mindspore/common/file_system.py +59 -9
  38. mindspore/common/hook_handle.py +82 -3
  39. mindspore/common/jit_config.py +5 -1
  40. mindspore/common/jit_trace.py +27 -12
  41. mindspore/common/lazy_inline.py +5 -3
  42. mindspore/common/np_dtype.py +3 -3
  43. mindspore/common/parameter.py +17 -127
  44. mindspore/common/recompute.py +4 -13
  45. mindspore/common/tensor.py +50 -217
  46. mindspore/communication/_comm_helper.py +11 -1
  47. mindspore/communication/comm_func.py +138 -4
  48. mindspore/communication/management.py +85 -1
  49. mindspore/config/op_info.config +0 -15
  50. mindspore/context.py +20 -106
  51. mindspore/dataset/__init__.py +1 -1
  52. mindspore/dataset/audio/transforms.py +1 -1
  53. mindspore/dataset/core/config.py +35 -1
  54. mindspore/dataset/engine/datasets.py +338 -319
  55. mindspore/dataset/engine/datasets_user_defined.py +38 -22
  56. mindspore/dataset/engine/datasets_vision.py +1 -1
  57. mindspore/dataset/engine/validators.py +1 -15
  58. mindspore/dataset/transforms/c_transforms.py +2 -2
  59. mindspore/dataset/transforms/transforms.py +3 -3
  60. mindspore/dataset/vision/__init__.py +1 -1
  61. mindspore/dataset/vision/py_transforms.py +8 -8
  62. mindspore/dataset/vision/transforms.py +17 -5
  63. mindspore/dataset/vision/utils.py +632 -21
  64. mindspore/device_context/ascend/op_tuning.py +35 -1
  65. mindspore/dnnl.dll +0 -0
  66. mindspore/{profiler/common/validator → graph}/__init__.py +9 -1
  67. mindspore/graph/custom_pass.py +55 -0
  68. mindspore/include/api/cell.h +28 -4
  69. mindspore/include/api/cfg.h +24 -7
  70. mindspore/include/api/context.h +1 -0
  71. mindspore/include/api/delegate.h +0 -2
  72. mindspore/include/api/dual_abi_helper.h +100 -19
  73. mindspore/include/api/graph.h +14 -1
  74. mindspore/include/api/kernel.h +16 -3
  75. mindspore/include/api/kernel_api.h +9 -1
  76. mindspore/include/api/metrics/accuracy.h +9 -0
  77. mindspore/include/api/model.h +5 -1
  78. mindspore/include/api/model_group.h +4 -0
  79. mindspore/include/api/model_parallel_runner.h +2 -0
  80. mindspore/include/api/status.h +48 -10
  81. mindspore/include/api/types.h +6 -1
  82. mindspore/include/dataset/constants.h +9 -0
  83. mindspore/include/dataset/execute.h +2 -2
  84. mindspore/jpeg62.dll +0 -0
  85. mindspore/mindrecord/__init__.py +3 -3
  86. mindspore/mindrecord/common/exceptions.py +1 -0
  87. mindspore/mindrecord/config.py +1 -1
  88. mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
  89. mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
  90. mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
  91. mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
  92. mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
  93. mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
  94. mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
  95. mindspore/mindrecord/filereader.py +4 -4
  96. mindspore/mindrecord/filewriter.py +5 -5
  97. mindspore/mindrecord/mindpage.py +2 -2
  98. mindspore/mindrecord/tools/cifar10.py +4 -3
  99. mindspore/mindrecord/tools/cifar100.py +1 -1
  100. mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
  101. mindspore/mindrecord/tools/cifar10_to_mr.py +6 -6
  102. mindspore/mindrecord/tools/csv_to_mr.py +1 -1
  103. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  104. mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
  105. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
  106. mindspore/mindspore_backend_common.dll +0 -0
  107. mindspore/mindspore_backend_manager.dll +0 -0
  108. mindspore/mindspore_cluster.dll +0 -0
  109. mindspore/mindspore_common.dll +0 -0
  110. mindspore/mindspore_core.dll +0 -0
  111. mindspore/mindspore_cpu.dll +0 -0
  112. mindspore/mindspore_dump.dll +0 -0
  113. mindspore/mindspore_frontend.dll +0 -0
  114. mindspore/mindspore_glog.dll +0 -0
  115. mindspore/mindspore_hardware_abstract.dll +0 -0
  116. mindspore/mindspore_memory_pool.dll +0 -0
  117. mindspore/mindspore_ms_backend.dll +0 -0
  118. mindspore/mindspore_ops.dll +0 -0
  119. mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
  120. mindspore/mindspore_profiler.dll +0 -0
  121. mindspore/mindspore_pyboost.dll +0 -0
  122. mindspore/mindspore_pynative.dll +0 -0
  123. mindspore/mindspore_runtime_pipeline.dll +0 -0
  124. mindspore/mindspore_runtime_utils.dll +0 -0
  125. mindspore/mindspore_tools.dll +0 -0
  126. mindspore/mint/__init__.py +15 -10
  127. mindspore/mint/distributed/__init__.py +4 -0
  128. mindspore/mint/distributed/distributed.py +392 -69
  129. mindspore/mint/nn/__init__.py +2 -16
  130. mindspore/mint/nn/functional.py +4 -110
  131. mindspore/mint/nn/layer/__init__.py +0 -2
  132. mindspore/mint/nn/layer/_functions.py +1 -2
  133. mindspore/mint/nn/layer/activation.py +0 -6
  134. mindspore/mint/nn/layer/basic.py +0 -47
  135. mindspore/mint/nn/layer/conv.py +10 -10
  136. mindspore/mint/nn/layer/normalization.py +11 -16
  137. mindspore/mint/nn/layer/pooling.py +0 -4
  138. mindspore/nn/__init__.py +1 -3
  139. mindspore/nn/cell.py +231 -239
  140. mindspore/nn/layer/activation.py +4 -2
  141. mindspore/nn/layer/basic.py +56 -14
  142. mindspore/nn/layer/container.py +16 -0
  143. mindspore/nn/layer/embedding.py +4 -169
  144. mindspore/nn/layer/image.py +1 -1
  145. mindspore/nn/layer/normalization.py +2 -1
  146. mindspore/nn/layer/thor_layer.py +4 -85
  147. mindspore/nn/optim/ada_grad.py +0 -1
  148. mindspore/nn/optim/adafactor.py +0 -1
  149. mindspore/nn/optim/adam.py +32 -127
  150. mindspore/nn/optim/adamax.py +0 -1
  151. mindspore/nn/optim/asgd.py +0 -1
  152. mindspore/nn/optim/ftrl.py +8 -102
  153. mindspore/nn/optim/lamb.py +1 -4
  154. mindspore/nn/optim/lars.py +0 -3
  155. mindspore/nn/optim/lazyadam.py +25 -218
  156. mindspore/nn/optim/momentum.py +5 -43
  157. mindspore/nn/optim/optimizer.py +6 -55
  158. mindspore/nn/optim/proximal_ada_grad.py +0 -1
  159. mindspore/nn/optim/rmsprop.py +0 -1
  160. mindspore/nn/optim/rprop.py +0 -1
  161. mindspore/nn/optim/sgd.py +0 -1
  162. mindspore/nn/optim/tft_wrapper.py +2 -4
  163. mindspore/nn/optim/thor.py +0 -2
  164. mindspore/nn/probability/bijector/bijector.py +7 -8
  165. mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
  166. mindspore/nn/probability/bijector/power_transform.py +20 -21
  167. mindspore/nn/probability/bijector/scalar_affine.py +5 -5
  168. mindspore/nn/probability/bijector/softplus.py +13 -14
  169. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  170. mindspore/nn/wrap/cell_wrapper.py +39 -5
  171. mindspore/nn/wrap/grad_reducer.py +4 -89
  172. mindspore/numpy/array_creations.py +4 -4
  173. mindspore/numpy/fft.py +9 -9
  174. mindspore/numpy/utils_const.py +1 -1
  175. mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
  176. mindspore/onnx/onnx_export.py +137 -0
  177. mindspore/opencv_core4110.dll +0 -0
  178. mindspore/opencv_imgcodecs4110.dll +0 -0
  179. mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
  180. mindspore/ops/__init__.py +2 -0
  181. mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
  182. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  183. mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
  184. mindspore/ops/_op_impl/cpu/__init__.py +1 -5
  185. mindspore/ops/_op_impl/cpu/{buffer_append.py → joinedstr_op.py} +8 -8
  186. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +28 -24
  187. mindspore/ops/auto_generate/gen_extend_func.py +6 -11
  188. mindspore/ops/auto_generate/gen_ops_def.py +385 -154
  189. mindspore/ops/auto_generate/gen_ops_prim.py +5676 -5167
  190. mindspore/ops/communication.py +97 -0
  191. mindspore/ops/composite/__init__.py +5 -2
  192. mindspore/ops/composite/base.py +16 -2
  193. mindspore/ops/composite/multitype_ops/__init__.py +3 -1
  194. mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
  195. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  196. mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
  197. mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
  198. mindspore/ops/function/__init__.py +2 -0
  199. mindspore/ops/function/array_func.py +24 -18
  200. mindspore/ops/function/comm_func.py +3883 -0
  201. mindspore/ops/function/debug_func.py +7 -6
  202. mindspore/ops/function/grad/grad_func.py +4 -12
  203. mindspore/ops/function/math_func.py +89 -86
  204. mindspore/ops/function/nn_func.py +92 -313
  205. mindspore/ops/function/random_func.py +9 -18
  206. mindspore/ops/functional.py +4 -1
  207. mindspore/ops/functional_overload.py +377 -30
  208. mindspore/ops/operations/__init__.py +2 -5
  209. mindspore/ops/operations/_custom_ops_utils.py +7 -9
  210. mindspore/ops/operations/_inner_ops.py +12 -50
  211. mindspore/ops/operations/_rl_inner_ops.py +0 -933
  212. mindspore/ops/operations/array_ops.py +5 -50
  213. mindspore/ops/operations/comm_ops.py +95 -17
  214. mindspore/ops/operations/custom_ops.py +237 -22
  215. mindspore/ops/operations/debug_ops.py +33 -35
  216. mindspore/ops/operations/manually_defined/ops_def.py +39 -318
  217. mindspore/ops/operations/math_ops.py +5 -5
  218. mindspore/ops/operations/nn_ops.py +3 -3
  219. mindspore/ops/operations/sparse_ops.py +0 -83
  220. mindspore/ops/primitive.py +4 -27
  221. mindspore/ops/tensor_method.py +88 -10
  222. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
  223. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
  224. mindspore/ops_generate/api/functions_cc_generator.py +53 -4
  225. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
  226. mindspore/ops_generate/common/gen_constants.py +11 -10
  227. mindspore/ops_generate/common/op_proto.py +18 -1
  228. mindspore/ops_generate/common/template.py +102 -245
  229. mindspore/ops_generate/common/template_utils.py +212 -0
  230. mindspore/ops_generate/gen_custom_ops.py +69 -0
  231. mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
  232. mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
  233. mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
  234. mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
  235. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
  236. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
  237. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
  238. mindspore/ops_generate/pyboost/gen_pyboost_func.py +0 -16
  239. mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
  240. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
  241. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
  242. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
  243. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
  244. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
  245. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
  246. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
  247. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
  248. mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
  249. mindspore/ops_generate/resources/yaml_loader.py +13 -0
  250. mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
  251. mindspore/parallel/_auto_parallel_context.py +5 -15
  252. mindspore/parallel/_cell_wrapper.py +1 -1
  253. mindspore/parallel/_parallel_serialization.py +4 -6
  254. mindspore/parallel/_ps_context.py +2 -2
  255. mindspore/parallel/_utils.py +34 -17
  256. mindspore/parallel/auto_parallel.py +23 -9
  257. mindspore/parallel/checkpoint_transform.py +20 -2
  258. mindspore/parallel/cluster/process_entity/_api.py +28 -33
  259. mindspore/parallel/cluster/process_entity/_utils.py +9 -5
  260. mindspore/parallel/cluster/run.py +5 -3
  261. mindspore/{experimental/llm_boost/ascend_native → parallel/distributed}/__init__.py +21 -22
  262. mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
  263. mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
  264. mindspore/parallel/function/reshard_func.py +6 -5
  265. mindspore/parallel/nn/parallel_cell_wrapper.py +40 -3
  266. mindspore/parallel/nn/parallel_grad_reducer.py +0 -8
  267. mindspore/parallel/shard.py +7 -21
  268. mindspore/parallel/strategy.py +336 -0
  269. mindspore/parallel/transform_safetensors.py +127 -20
  270. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +13 -9
  271. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
  272. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
  273. mindspore/profiler/common/constant.py +5 -0
  274. mindspore/profiler/common/file_manager.py +9 -0
  275. mindspore/profiler/common/msprof_cmd_tool.py +40 -4
  276. mindspore/profiler/common/path_manager.py +65 -24
  277. mindspore/profiler/common/profiler_context.py +27 -14
  278. mindspore/profiler/common/profiler_info.py +3 -3
  279. mindspore/profiler/common/profiler_meta_data.py +1 -0
  280. mindspore/profiler/common/profiler_op_analyse.py +10 -6
  281. mindspore/profiler/common/profiler_path_manager.py +13 -0
  282. mindspore/profiler/common/util.py +30 -3
  283. mindspore/profiler/dynamic_profiler.py +91 -46
  284. mindspore/profiler/envprofiler.py +30 -5
  285. mindspore/profiler/experimental_config.py +18 -2
  286. mindspore/profiler/platform/cpu_profiler.py +10 -4
  287. mindspore/profiler/platform/npu_profiler.py +34 -7
  288. mindspore/profiler/profiler.py +193 -145
  289. mindspore/profiler/profiler_action_controller.py +1 -1
  290. mindspore/profiler/profiler_interface.py +2 -2
  291. mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
  292. mindspore/run_check/_check_version.py +108 -24
  293. mindspore/runtime/__init__.py +9 -6
  294. mindspore/runtime/executor.py +35 -0
  295. mindspore/runtime/memory.py +113 -0
  296. mindspore/runtime/thread_bind_core.py +1 -1
  297. mindspore/swresample-4.dll +0 -0
  298. mindspore/swscale-6.dll +0 -0
  299. mindspore/tinyxml2.dll +0 -0
  300. mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
  301. mindspore/tools/data_dump.py +130 -0
  302. mindspore/tools/sdc_detect.py +91 -0
  303. mindspore/tools/stress_detect.py +63 -0
  304. mindspore/train/__init__.py +6 -6
  305. mindspore/train/_utils.py +8 -21
  306. mindspore/train/amp.py +6 -7
  307. mindspore/train/callback/_callback.py +2 -1
  308. mindspore/train/callback/_checkpoint.py +1 -17
  309. mindspore/train/callback/_flops_collector.py +10 -6
  310. mindspore/train/callback/_train_fault_tolerance.py +72 -25
  311. mindspore/train/data_sink.py +5 -9
  312. mindspore/train/dataset_helper.py +5 -5
  313. mindspore/train/model.py +41 -230
  314. mindspore/train/serialization.py +160 -401
  315. mindspore/train/train_thor/model_thor.py +2 -2
  316. mindspore/turbojpeg.dll +0 -0
  317. mindspore/utils/__init__.py +6 -3
  318. mindspore/utils/dlpack.py +92 -0
  319. mindspore/utils/dryrun.py +1 -1
  320. mindspore/utils/runtime_execution_order_check.py +10 -0
  321. mindspore/utils/sdc_detect.py +14 -12
  322. mindspore/utils/stress_detect.py +43 -0
  323. mindspore/utils/utils.py +152 -16
  324. mindspore/version.py +1 -1
  325. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
  326. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/RECORD +330 -344
  327. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  328. mindspore/communication/_hccl_management.py +0 -297
  329. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -207
  330. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
  331. mindspore/experimental/llm_boost/atb/__init__.py +0 -23
  332. mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
  333. mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
  334. mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
  335. mindspore/experimental/llm_boost/register.py +0 -130
  336. mindspore/experimental/llm_boost/utils.py +0 -31
  337. mindspore/include/OWNERS +0 -7
  338. mindspore/mindspore_cpu_res_manager.dll +0 -0
  339. mindspore/mindspore_ops_kernel_common.dll +0 -0
  340. mindspore/mindspore_res_manager.dll +0 -0
  341. mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
  342. mindspore/nn/reinforcement/_batch_read_write.py +0 -142
  343. mindspore/nn/reinforcement/_tensors_queue.py +0 -152
  344. mindspore/nn/reinforcement/tensor_array.py +0 -145
  345. mindspore/opencv_core452.dll +0 -0
  346. mindspore/opencv_imgcodecs452.dll +0 -0
  347. mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
  348. mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
  349. mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
  350. mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
  351. mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
  352. mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
  353. mindspore/ops/operations/_tensor_array.py +0 -359
  354. mindspore/ops/operations/rl_ops.py +0 -288
  355. mindspore/parallel/_offload_context.py +0 -275
  356. mindspore/parallel/_recovery_context.py +0 -115
  357. mindspore/parallel/_transformer/__init__.py +0 -35
  358. mindspore/parallel/_transformer/layers.py +0 -765
  359. mindspore/parallel/_transformer/loss.py +0 -251
  360. mindspore/parallel/_transformer/moe.py +0 -693
  361. mindspore/parallel/_transformer/op_parallel_config.py +0 -222
  362. mindspore/parallel/_transformer/transformer.py +0 -3124
  363. mindspore/parallel/mpi/_mpi_config.py +0 -116
  364. mindspore/profiler/common/validator/validate_path.py +0 -84
  365. mindspore/train/memory_profiling_pb2.py +0 -298
  366. mindspore/utils/hooks.py +0 -81
  367. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  368. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
  369. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
  370. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2020-2022 Huawei Technologies Co., Ltd
1
+ # Copyright 2020-2021 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -28,7 +28,6 @@ from mindspore.common.tensor import Tensor
28
28
  from mindspore import _checkparam as validator
29
29
  from mindspore.nn.optim.optimizer import Optimizer
30
30
  from mindspore.nn.optim.optimizer import opt_init_args_register
31
- from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
32
31
  from mindspore.common._decorator import deprecated
33
32
 
34
33
  _adam_opt = C.MultitypeFuncGraph("adam_opt")
@@ -727,7 +726,6 @@ class Adam(Optimizer):
727
726
  self.opt = P.Adam(use_locking, use_nesterov)
728
727
  self.sparse_opt = P.FusedSparseLazyAdam(use_locking, use_nesterov)
729
728
  self.sparse_opt.set_device("CPU")
730
- self._init_distributed_opts(use_locking, use_nesterov)
731
729
 
732
730
  else:
733
731
  self._is_device = True
@@ -737,7 +735,6 @@ class Adam(Optimizer):
737
735
  self.opt = P.Adam(use_locking, use_nesterov)
738
736
  self.sparse_opt = P.FusedSparseAdam(use_locking, use_nesterov)
739
737
  self.sparse_opt.set_device("CPU")
740
- self._init_distributed_opts(use_locking, use_nesterov)
741
738
 
742
739
  def _apply_adam(self, params, beta1_power, beta2_power, moment1, moment2, lr, gradients):
743
740
  """Execute Adam optimizer and its variants."""
@@ -750,83 +747,44 @@ class Adam(Optimizer):
750
747
  self.beta2, self.eps, lr), gradients, params, moment1, moment2)
751
748
  # Lazy adam or normal adam
752
749
  else:
753
- if self.use_dist_optimizer:
754
- if self.use_dist_optimizer and self.use_amsgrad:
755
- raise ValueError(f"Adam with amsgrad is currently not supporting distributed training!"
756
- f"Please set use_amsgrad=False for distributed training.")
757
- if self.is_group_lr:
758
- if self.use_lazy:
759
- success = self.map_reverse(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
760
- self.use_locking, self.use_nesterov,
761
- self._is_device, beta1_power, beta2_power,
762
- self.beta1, self.beta2, self.eps),
763
- lr, gradients, self._parameters, self.moment1, self.moment2,
764
- self.dense_lazyadam_opts,
765
- self.use_dense_opt_flags, self.sparse_lazyadam_opts,
766
- self.use_sparse_opt_flags)
767
- # Normal Adam
768
- else:
769
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt, self.use_locking,
770
- self.use_nesterov, self._is_device, beta1_power, beta2_power,
771
- self.beta1, self.beta2, self.eps),
772
- lr, gradients, params, moment1, moment2,
773
- self.dense_adam_opts, self.use_dense_opt_flags,
774
- self.sparse_adam_opts, self.use_sparse_opt_flags)
750
+ if self.is_group_lr:
751
+ if self.use_lazy:
752
+ success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
753
+ self.use_locking, self.use_nesterov,
754
+ self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
755
+ self.eps), lr, gradients, params, moment1, moment2)
775
756
  else:
776
- if self.use_lazy:
777
- success = self.map_reverse(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
778
- self.use_locking, self.use_nesterov,
779
- self._is_device, beta1_power, beta2_power, self.beta1,
780
- self.beta2, self.eps, lr), gradients, self._parameters,
781
- self.moment1, self.moment2,
782
- self.dense_lazyadam_opts, self.use_dense_opt_flags,
783
- self.sparse_lazyadam_opts, self.use_sparse_opt_flags)
757
+ if self.use_amsgrad:
758
+ success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
759
+ self.use_locking, self.use_nesterov,
760
+ self._is_device, beta1_power, beta2_power,
761
+ self.beta1, self.beta2, self.eps), lr, gradients, params,
762
+ moment1, moment2, self.vhat)
784
763
  else:
785
764
  success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
786
765
  self.use_locking, self.use_nesterov,
787
- self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
788
- self.eps, lr), gradients, params, moment1, moment2,
789
- self.dense_adam_opts,
790
- self.use_dense_opt_flags, self.sparse_adam_opts, self.use_sparse_opt_flags)
766
+ self._is_device, beta1_power, beta2_power,
767
+ self.beta1, self.beta2, self.eps), lr, gradients, params,
768
+ moment1, moment2)
791
769
  else:
792
- if self.is_group_lr:
793
- if self.use_lazy:
794
- success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
795
- self.use_locking, self.use_nesterov,
796
- self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
797
- self.eps), lr, gradients, params, moment1, moment2)
798
- else:
799
- if self.use_amsgrad:
800
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
801
- self.use_locking, self.use_nesterov,
802
- self._is_device, beta1_power, beta2_power,
803
- self.beta1, self.beta2, self.eps), lr, gradients, params,
804
- moment1, moment2, self.vhat)
805
- else:
806
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
807
- self.use_locking, self.use_nesterov,
808
- self._is_device, beta1_power, beta2_power,
809
- self.beta1, self.beta2, self.eps), lr, gradients, params,
810
- moment1, moment2)
770
+ if self.use_lazy:
771
+ success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
772
+ self.use_locking, self.use_nesterov,
773
+ self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
774
+ self.eps, lr), gradients, params, moment1, moment2)
811
775
  else:
812
- if self.use_lazy:
813
- success = self.map_(F.partial(_lazy_adam_opt, self.opt, self.sparse_opt,
776
+ if self.use_amsgrad:
777
+ success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
814
778
  self.use_locking, self.use_nesterov,
815
- self._is_device, beta1_power, beta2_power, self.beta1, self.beta2,
816
- self.eps, lr), gradients, params, moment1, moment2)
779
+ self._is_device, beta1_power, beta2_power,
780
+ self.beta1, self.beta2, self.eps, lr), gradients, params,
781
+ moment1, moment2, self.vhat)
817
782
  else:
818
- if self.use_amsgrad:
819
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
820
- self.use_locking, self.use_nesterov,
821
- self._is_device, beta1_power, beta2_power,
822
- self.beta1, self.beta2, self.eps, lr), gradients, params,
823
- moment1, moment2, self.vhat)
824
- else:
825
- success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
826
- self.use_locking, self.use_nesterov,
827
- self._is_device, beta1_power, beta2_power,
828
- self.beta1, self.beta2, self.eps, lr), gradients, params,
829
- moment1, moment2)
783
+ success = self.map_(F.partial(_adam_opt, self.opt, self.sparse_opt,
784
+ self.use_locking, self.use_nesterov,
785
+ self._is_device, beta1_power, beta2_power,
786
+ self.beta1, self.beta2, self.eps, lr), gradients, params,
787
+ moment1, moment2)
830
788
 
831
789
  return success
832
790
 
@@ -835,7 +793,6 @@ class Adam(Optimizer):
835
793
  params = self._parameters
836
794
  moment1 = self.moment1
837
795
  moment2 = self.moment2
838
- gradients = self.flatten_gradients(gradients)
839
796
  gradients = self.decay_weight(gradients)
840
797
  if not self.use_offload:
841
798
  gradients = self.gradients_centralization(gradients)
@@ -859,13 +816,6 @@ class Adam(Optimizer):
859
816
  """
860
817
  self._set_base_target(value)
861
818
 
862
- def _init_distributed_opts(self, use_locking, use_nesterov):
863
- self.use_dist_optimizer = self._use_distibuted_optimizer()
864
- self.dense_adam_opts, self.use_dense_opt_flags = \
865
- self._get_distributed_optimizer_list("adam", use_locking, use_nesterov)
866
- self.sparse_adam_opts, self.use_sparse_opt_flags = \
867
- self._get_distributed_optimizer_list("fused_sparse_adam", use_locking, use_nesterov)
868
-
869
819
 
870
820
  class AdamWeightDecay(Optimizer):
871
821
  r"""
@@ -909,9 +859,7 @@ class AdamWeightDecay(Optimizer):
909
859
  Note:
910
860
  There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
911
861
  and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
912
- As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means, refer
913
- document `LossScale <https://www.mindspore.cn/tutorials/en/master/beginner/mixed_precision.html>`_ to
914
- process `loss_scale` correctly.
862
+ As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means.
915
863
 
916
864
  If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
917
865
  'beta' or 'gamma' in their names. Users can group parameters to change the strategy of decaying weight. When
@@ -1030,11 +978,9 @@ class AdamWeightDecay(Optimizer):
1030
978
 
1031
979
  @jit(backend="ms_backend")
1032
980
  def construct(self, gradients):
1033
- gradients = self.flatten_gradients(gradients)
1034
981
  weight_decay = self.get_weight_decay()
1035
982
  lr = self.get_lr()
1036
983
  self.assignadd(self.global_step, self.global_step_increase_tensor)
1037
-
1038
984
  if self.use_fused_opt:
1039
985
  if self.is_group:
1040
986
  if self.is_group_lr:
@@ -1072,19 +1018,6 @@ class AdamWeightDecay(Optimizer):
1072
1018
 
1073
1019
  return optim_result
1074
1020
 
1075
- @Optimizer.target.setter
1076
- def target(self, value):
1077
- """
1078
- If the input value is set to "CPU", the parameters will be updated on the host using the Fused
1079
- optimizer operation.
1080
- """
1081
- self._set_base_target(value)
1082
- if value == 'CPU':
1083
- self.fused_opt.set_device("CPU")
1084
- self.use_fused_opt = True
1085
- else:
1086
- self.use_fused_opt = False
1087
-
1088
1021
 
1089
1022
  class AdamOffload(Optimizer):
1090
1023
  r"""
@@ -1253,7 +1186,6 @@ class AdamOffload(Optimizer):
1253
1186
  params = self._parameters
1254
1187
  moment1 = self.moment1
1255
1188
  moment2 = self.moment2
1256
- gradients = self.flatten_gradients(gradients)
1257
1189
  gradients = self.decay_weight(gradients)
1258
1190
  gradients = self.scale_grad(gradients)
1259
1191
  lr = self.get_lr()
@@ -1272,30 +1204,3 @@ class AdamOffload(Optimizer):
1272
1204
  beta1_power, beta2_power, self.beta1, self.beta2, self.eps, lr),
1273
1205
  gradients, params, moment1, moment2)
1274
1206
  return success
1275
-
1276
-
1277
- def create_distributed_adam(*args, **kwargs):
1278
- """
1279
- Create the distributed Adam op.
1280
- """
1281
- adam = P.Adam(*args, **kwargs)
1282
- adam.add_prim_attr("gradient_type", "dense_gradient")
1283
- adam.add_prim_attr("parameter_input_index", 0)
1284
- adam.add_prim_attr("gradient_input_index", 9)
1285
- return adam
1286
-
1287
-
1288
- def create_distributed_fused_sparse_adam(*args, **kwargs):
1289
- """
1290
- Create the distributed FusedSparseAdam op.
1291
- """
1292
- sparse_adam = P.FusedSparseAdam(*args, **kwargs)
1293
- sparse_adam.add_prim_attr("gradient_type", "sparse_gradient")
1294
- sparse_adam.add_prim_attr("parameter_input_index", 0)
1295
- sparse_adam.add_prim_attr("gradient_input_index", 9)
1296
- sparse_adam.add_prim_attr("indices_input_index", 10)
1297
- return sparse_adam
1298
-
1299
-
1300
- _register_dist_optimizer("adam", create_distributed_adam)
1301
- _register_dist_optimizer("fused_sparse_adam", create_distributed_fused_sparse_adam)
@@ -202,7 +202,6 @@ class AdaMax(Optimizer):
202
202
 
203
203
  @jit
204
204
  def construct(self, gradients):
205
- gradients = self.flatten_gradients(gradients)
206
205
  gradients = self.decay_weight(gradients)
207
206
  gradients = self.gradients_centralization(gradients)
208
207
  gradients = self.scale_grad(gradients)
@@ -184,7 +184,6 @@ class ASGD(Optimizer):
184
184
 
185
185
  @jit(backend="ms_backend")
186
186
  def construct(self, gradients):
187
- gradients = self.flatten_gradients(gradients)
188
187
  gradients = self.decay_weight(gradients)
189
188
  gradients = self.gradients_centralization(gradients)
190
189
  gradients = self.scale_grad(gradients)
@@ -21,27 +21,10 @@ from mindspore.common.api import jit
21
21
  from mindspore import _checkparam as validator
22
22
  from mindspore.nn.optim.optimizer import Optimizer
23
23
  from mindspore.nn.optim.optimizer import opt_init_args_register
24
- from mindspore.nn.optim._dist_optimizer_registry import _register_dist_optimizer
25
24
 
26
25
  _ftrl_opt = C.MultitypeFuncGraph("ftrl_opt")
27
26
 
28
27
 
29
- @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
30
- "RowTensor", "Tensor", "Tensor", "Bool", "Function", "Bool", "Function", "Bool")
31
- def _tensor_run_opt_with_sparse_dist(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
32
- gradient, weight, moment, cache_enable,
33
- distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
34
- """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
35
- success = True
36
- indices = gradient.indices
37
- values = gradient.values
38
- if use_sparse_flag:
39
- success = F.depend(success, distributed_sparse_opt(weight, moment, linear, values, indices))
40
- else:
41
- success = F.depend(success, spars_opt(weight, moment, linear, values, indices))
42
- return success
43
-
44
-
45
28
  def _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values):
46
29
  """Apllpy ftrl optimizer for map parameter"""
47
30
  success = True
@@ -78,43 +61,10 @@ def _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, mome
78
61
  return success
79
62
 
80
63
 
81
- @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "MapTensor",
82
- "MapTensor", "MapTensor", "MapTensor", "Bool", "Function", "Bool", "Function", "Bool")
83
- def _run_map_tensor_opt_with_sparse_dist(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
84
- gradient, weight, moment, cache_enable,
85
- distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
86
- """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
87
- success = True
88
- indices, values = gradient.get_data()
89
- if use_sparse_flag:
90
- # PS Mode.
91
- success = F.depend(success, distributed_sparse_opt(weight, moment, linear, values, indices))
92
- elif cache_enable:
93
- # PS Cache mode.
94
- _apply_map_tensor_ftrl(l1, l2, lr_power, learning_rate, linear, weight, moment, indices, values)
95
- else:
96
- raise Exception("Unexpected mode for distributed optimizer.")
97
- return success
98
-
99
-
100
- @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
101
- "Tensor", "Tensor", "Tensor", "Bool", "Function", "Bool", "Function", "Bool")
102
- def _tensor_run_opt_dist(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
103
- gradient, weight, moment, cache_enable,
104
- distributed_opt, use_flag, distributed_sparse_opt, use_sparse_flag):
105
- """Apply ftrl optimizer to the weight parameter."""
106
- success = True
107
- if use_flag:
108
- success = F.depend(success, distributed_opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power))
109
- else:
110
- success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power))
111
- return success
112
-
113
-
114
64
  @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
115
- "RowTensor", "Tensor", "Tensor", "Bool")
65
+ "RowTensor", "Tensor", "Tensor")
116
66
  def _tensor_run_opt_with_sparse(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
117
- gradient, weight, moment, cache_enable):
67
+ gradient, weight, moment):
118
68
  """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
119
69
  success = True
120
70
  indices = gradient.indices
@@ -124,9 +74,9 @@ def _tensor_run_opt_with_sparse(opt, spars_opt, l1, l2, lr_power, learning_rate,
124
74
 
125
75
 
126
76
  @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "MapTensor",
127
- "MapTensor", "MapTensor", "MapTensor", "Bool")
77
+ "MapTensor", "MapTensor", "MapTensor")
128
78
  def _run_map_tensor_opt_with_sparse(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
129
- gradient, weight, moment, cache_enable):
79
+ gradient, weight, moment):
130
80
  """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
131
81
  success = True
132
82
  indices, values = gradient.get_data()
@@ -135,9 +85,9 @@ def _run_map_tensor_opt_with_sparse(opt, spars_opt, l1, l2, lr_power, learning_r
135
85
 
136
86
 
137
87
  @_ftrl_opt.register("Function", "Function", "Number", "Number", "Number", "Tensor", "Tensor",
138
- "Tensor", "Tensor", "Tensor", "Bool")
88
+ "Tensor", "Tensor", "Tensor")
139
89
  def _tensor_run_opt(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
140
- gradient, weight, moment, cache_enable):
90
+ gradient, weight, moment):
141
91
  """Apply ftrl optimizer to the weight parameter."""
142
92
  success = True
143
93
  success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power))
@@ -320,14 +270,11 @@ class FTRL(Optimizer):
320
270
  self.use_locking = use_locking
321
271
  self.sparse_opt = P.SparseApplyFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking)
322
272
 
323
- self._init_distributed_opts(use_locking, learning_rate, l1, l2, lr_power)
324
-
325
273
  @jit
326
274
  def construct(self, grads):
327
275
  params = self._parameters
328
276
  moments = self.moments
329
277
  linear = self.linear
330
- grads = self.flatten_gradients(grads)
331
278
  grads = self.decay_weight(grads)
332
279
  grads = self.gradients_centralization(grads)
333
280
  grads = self.scale_grad(grads)
@@ -335,14 +282,8 @@ class FTRL(Optimizer):
335
282
  lr = self.get_lr()
336
283
  self.assignadd(self.global_step, self.global_step_increase_tensor)
337
284
 
338
- if self.use_dist_optimizer:
339
- success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, self.l1, self.l2, self.lr_power, lr),
340
- linear, grads, params, moments, self.cache_enable,
341
- self.distributed_opts, self.use_distributed_opt_flags,
342
- self.distributed_sparse_opts, self.use_distributed_sparse_opt_flags)
343
- else:
344
- success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, self.l1, self.l2, self.lr_power, lr),
345
- linear, grads, params, moments, self.cache_enable)
285
+ success = self.map_(F.partial(_ftrl_opt, self.opt, self.sparse_opt, self.l1, self.l2, self.lr_power, lr),
286
+ linear, grads, params, moments)
346
287
  return success
347
288
 
348
289
  @Optimizer.target.setter
@@ -366,38 +307,3 @@ class FTRL(Optimizer):
366
307
  self.sparse_opt = P.SparseApplyFtrl(self.lr, self.l1, self.l2, self.lr_power, self.use_locking)
367
308
 
368
309
  self._target = value
369
-
370
- def _init_distributed_opts(self, use_locking, learning_rate, l1, l2, lr_power):
371
- self.use_dist_optimizer = self._use_distibuted_optimizer()
372
- self.distributed_opts, self.use_distributed_opt_flags =\
373
- self._get_distributed_optimizer_list("ftrl", use_locking=use_locking)
374
- self.distributed_sparse_opts, self.use_distributed_sparse_opt_flags =\
375
- self._get_distributed_optimizer_list("fused_sparse_ftrl", learning_rate,
376
- l1, l2, lr_power, use_locking=use_locking)
377
-
378
-
379
- def create_distributed_ftrl(*args, **kwargs):
380
- """
381
- Create the distributed ApplyFtrl op.
382
- """
383
- ftrl = P.ApplyFtrl(*args, **kwargs)
384
- ftrl.add_prim_attr("gradient_type", "dense_gradient")
385
- ftrl.add_prim_attr("parameter_input_index", 0)
386
- ftrl.add_prim_attr("gradient_input_index", 3)
387
- return ftrl
388
-
389
-
390
- def create_distributed_fused_sparse_ftrl(*args, **kwargs):
391
- """
392
- Create the distributed FusedSparseFtrl op.
393
- """
394
- sparse_ftrl = P.FusedSparseFtrl(*args, **kwargs)
395
- sparse_ftrl.add_prim_attr("gradient_type", "sparse_gradient")
396
- sparse_ftrl.add_prim_attr("parameter_input_index", 0)
397
- sparse_ftrl.add_prim_attr("gradient_input_index", 3)
398
- sparse_ftrl.add_prim_attr("indices_input_index", 4)
399
- return sparse_ftrl
400
-
401
-
402
- _register_dist_optimizer("ftrl", create_distributed_ftrl)
403
- _register_dist_optimizer("fused_sparse_ftrl", create_distributed_fused_sparse_ftrl)
@@ -131,9 +131,7 @@ class Lamb(Optimizer):
131
131
  Note:
132
132
  There is usually no connection between a optimizer and mixed precision. But when `FixedLossScaleManager` is used
133
133
  and `drop_overflow_update` in `FixedLossScaleManager` is set to False, optimizer needs to set the 'loss_scale'.
134
- As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means. Refer
135
- document `LossScale <https://www.mindspore.cn/tutorials/en/master/beginner/mixed_precision.html>`_ to
136
- process `loss_scale` correctly.
134
+ As this optimizer has no argument of `loss_scale`, so `loss_scale` needs to be processed by other means.
137
135
 
138
136
  If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without
139
137
  'beta' or 'gamma' in their names. Users can group parameters to change the strategy of decaying weight. When
@@ -271,7 +269,6 @@ class Lamb(Optimizer):
271
269
  lr = self.get_lr()
272
270
  self.assignadd(self.global_step, self.global_step_increase_tensor)
273
271
  lamb_opt = _lamb_opt
274
- gradients = self.flatten_gradients(gradients)
275
272
  gradients = self.gradients_centralization(gradients)
276
273
  if self.is_group:
277
274
  if self.is_group_lr:
@@ -125,8 +125,6 @@ class LARS(Optimizer):
125
125
  self.weight_decay = optimizer.weight_decay
126
126
  self.global_step = optimizer.global_step
127
127
  self.parameters = optimizer.parameters
128
- if optimizer._use_flattened_params: # pylint: disable=W0212
129
- self.opt._use_flattened_params = False # pylint: disable=W0212
130
128
  self._user_parameters += [param.name for param in self.parameters]
131
129
  self.use_clip = use_clip
132
130
  self.lars_flag = tuple(lars_filter(x) for x in self.parameters)
@@ -173,7 +171,6 @@ class LARS(Optimizer):
173
171
  @jit
174
172
  def construct(self, gradients):
175
173
  params = self.parameters
176
- gradients = self.flatten_gradients(gradients)
177
174
  if self.use_clip:
178
175
  lr = self._get_lr()
179
176
  else: