mindspore 2.6.0rc1__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (458) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +2 -2
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +42 -11
  9. mindspore/_extends/builtin_operations.py +3 -3
  10. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  11. mindspore/_extends/optimize/cell_utils.py +96 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +3 -3
  15. mindspore/_extends/parse/compile_config.py +44 -22
  16. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  17. mindspore/_extends/parse/parser.py +65 -84
  18. mindspore/_extends/parse/resources.py +39 -0
  19. mindspore/_extends/parse/standard_method.py +58 -14
  20. mindspore/_extends/parse/trope.py +8 -1
  21. mindspore/_extends/pijit/__init__.py +1 -2
  22. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  23. mindspore/amp.py +4 -22
  24. mindspore/atlprov.dll +0 -0
  25. mindspore/avcodec-59.dll +0 -0
  26. mindspore/avdevice-59.dll +0 -0
  27. mindspore/avfilter-8.dll +0 -0
  28. mindspore/avformat-59.dll +0 -0
  29. mindspore/avutil-57.dll +0 -0
  30. mindspore/boost/adasum.py +1 -1
  31. mindspore/boost/boost_cell_wrapper.py +4 -4
  32. mindspore/c1.dll +0 -0
  33. mindspore/c1xx.dll +0 -0
  34. mindspore/c2.dll +0 -0
  35. mindspore/common/__init__.py +43 -12
  36. mindspore/common/_grad_function.py +2 -1
  37. mindspore/common/_pijit_context.py +28 -7
  38. mindspore/common/_stub_tensor.py +1 -209
  39. mindspore/common/_tensor_cpp_method.py +1 -1
  40. mindspore/common/_tensor_docs.py +178 -53
  41. mindspore/common/_utils.py +9 -1
  42. mindspore/common/api.py +377 -203
  43. mindspore/common/dtype.py +108 -57
  44. mindspore/common/dump.py +11 -16
  45. mindspore/common/dynamic_shape/__init__.py +0 -0
  46. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
  47. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  48. mindspore/common/file_system.py +59 -9
  49. mindspore/common/generator.py +5 -3
  50. mindspore/common/hook_handle.py +33 -5
  51. mindspore/common/jit_config.py +1 -1
  52. mindspore/common/jit_trace.py +84 -105
  53. mindspore/common/np_dtype.py +3 -3
  54. mindspore/common/parameter.py +27 -29
  55. mindspore/common/recompute.py +5 -7
  56. mindspore/common/sparse_tensor.py +0 -3
  57. mindspore/common/symbol.py +0 -1
  58. mindspore/common/tensor.py +117 -131
  59. mindspore/communication/_comm_helper.py +46 -4
  60. mindspore/communication/management.py +79 -7
  61. mindspore/context.py +67 -55
  62. mindspore/dataset/__init__.py +1 -1
  63. mindspore/dataset/audio/transforms.py +1 -1
  64. mindspore/dataset/core/config.py +38 -4
  65. mindspore/dataset/engine/datasets.py +350 -322
  66. mindspore/dataset/engine/datasets_user_defined.py +70 -24
  67. mindspore/dataset/engine/iterators.py +2 -2
  68. mindspore/dataset/engine/obs/config_loader.py +2 -2
  69. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  70. mindspore/dataset/transforms/c_transforms.py +2 -2
  71. mindspore/dataset/transforms/py_transforms.py +7 -3
  72. mindspore/dataset/transforms/transforms.py +10 -6
  73. mindspore/dataset/vision/__init__.py +1 -1
  74. mindspore/dataset/vision/py_transforms.py +8 -8
  75. mindspore/dataset/vision/transforms.py +17 -5
  76. mindspore/dataset/vision/utils.py +632 -21
  77. mindspore/dataset/vision/validators.py +1 -0
  78. mindspore/device_context/ascend/device.py +1 -1
  79. mindspore/device_context/ascend/op_tuning.py +35 -1
  80. mindspore/device_context/gpu/__init__.py +2 -2
  81. mindspore/device_context/gpu/device.py +1 -1
  82. mindspore/device_context/gpu/op_precision.py +4 -2
  83. mindspore/device_context/gpu/op_tuning.py +6 -3
  84. mindspore/device_manager.py +16 -9
  85. mindspore/dnnl.dll +0 -0
  86. mindspore/dpcmi.dll +0 -0
  87. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -4
  88. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  89. mindspore/experimental/optim/adadelta.py +13 -20
  90. mindspore/experimental/optim/adagrad.py +15 -22
  91. mindspore/experimental/optim/adam.py +17 -24
  92. mindspore/experimental/optim/adamax.py +14 -22
  93. mindspore/experimental/optim/adamw.py +28 -34
  94. mindspore/experimental/optim/asgd.py +15 -25
  95. mindspore/experimental/optim/lr_scheduler.py +27 -45
  96. mindspore/experimental/optim/nadam.py +14 -24
  97. mindspore/experimental/optim/optimizer.py +13 -23
  98. mindspore/experimental/optim/radam.py +18 -24
  99. mindspore/experimental/optim/rmsprop.py +14 -25
  100. mindspore/experimental/optim/rprop.py +15 -26
  101. mindspore/experimental/optim/sgd.py +9 -19
  102. mindspore/hal/__init__.py +4 -4
  103. mindspore/hal/contiguous_tensors_handle.py +2 -2
  104. mindspore/hal/memory.py +27 -7
  105. mindspore/include/api/cell.h +65 -5
  106. mindspore/include/api/cfg.h +24 -7
  107. mindspore/include/api/context.h +1 -0
  108. mindspore/include/api/delegate.h +10 -2
  109. mindspore/include/api/dual_abi_helper.h +100 -19
  110. mindspore/include/api/graph.h +14 -1
  111. mindspore/include/api/kernel.h +16 -3
  112. mindspore/include/api/kernel_api.h +9 -1
  113. mindspore/include/api/metrics/accuracy.h +9 -0
  114. mindspore/include/api/model.h +8 -1
  115. mindspore/include/api/model_group.h +4 -0
  116. mindspore/include/api/model_parallel_runner.h +2 -0
  117. mindspore/include/api/status.h +48 -10
  118. mindspore/include/api/types.h +8 -3
  119. mindspore/include/c_api/model_c.h +0 -58
  120. mindspore/include/c_api/tensor_c.h +0 -26
  121. mindspore/include/dataset/constants.h +9 -0
  122. mindspore/include/dataset/vision_ascend.h +1 -1
  123. mindspore/jpeg62.dll +0 -0
  124. mindspore/mindrecord/tools/cifar10.py +61 -11
  125. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  126. mindspore/mindspore_backend_common.dll +0 -0
  127. mindspore/mindspore_backend_manager.dll +0 -0
  128. mindspore/mindspore_common.dll +0 -0
  129. mindspore/mindspore_core.dll +0 -0
  130. mindspore/mindspore_cpu_res_manager.dll +0 -0
  131. mindspore/mindspore_dump.dll +0 -0
  132. mindspore/mindspore_frontend.dll +0 -0
  133. mindspore/mindspore_glog.dll +0 -0
  134. mindspore/mindspore_memory_pool.dll +0 -0
  135. mindspore/mindspore_ms_backend.dll +0 -0
  136. mindspore/mindspore_ops.dll +0 -0
  137. mindspore/mindspore_ops_host.dll +0 -0
  138. mindspore/mindspore_ops_kernel_common.dll +0 -0
  139. mindspore/mindspore_profiler.dll +0 -0
  140. mindspore/mindspore_pyboost.dll +0 -0
  141. mindspore/mindspore_pynative.dll +0 -0
  142. mindspore/mindspore_res_manager.dll +0 -0
  143. mindspore/mindspore_runtime_pipeline.dll +0 -0
  144. mindspore/mint/__init__.py +6 -46
  145. mindspore/mint/distributed/__init__.py +5 -0
  146. mindspore/mint/distributed/distributed.py +429 -23
  147. mindspore/mint/nn/__init__.py +1 -1
  148. mindspore/mint/nn/functional.py +53 -6
  149. mindspore/mint/nn/layer/_functions.py +163 -294
  150. mindspore/mint/nn/layer/activation.py +8 -6
  151. mindspore/mint/nn/layer/conv.py +140 -104
  152. mindspore/mint/nn/layer/normalization.py +11 -25
  153. mindspore/mint/optim/adam.py +19 -18
  154. mindspore/mint/optim/adamw.py +14 -8
  155. mindspore/mint/optim/sgd.py +5 -5
  156. mindspore/msobj140.dll +0 -0
  157. mindspore/mspdb140.dll +0 -0
  158. mindspore/mspdbcore.dll +0 -0
  159. mindspore/mspdbst.dll +0 -0
  160. mindspore/mspft140.dll +0 -0
  161. mindspore/msvcdis140.dll +0 -0
  162. mindspore/msvcp140_1.dll +0 -0
  163. mindspore/msvcp140_2.dll +0 -0
  164. mindspore/msvcp140_atomic_wait.dll +0 -0
  165. mindspore/msvcp140_codecvt_ids.dll +0 -0
  166. mindspore/nn/cell.py +491 -623
  167. mindspore/nn/grad/cell_grad.py +11 -12
  168. mindspore/nn/layer/activation.py +36 -36
  169. mindspore/nn/layer/basic.py +74 -77
  170. mindspore/nn/layer/channel_shuffle.py +4 -4
  171. mindspore/nn/layer/combined.py +4 -2
  172. mindspore/nn/layer/conv.py +117 -110
  173. mindspore/nn/layer/dense.py +9 -7
  174. mindspore/nn/layer/embedding.py +50 -52
  175. mindspore/nn/layer/image.py +38 -40
  176. mindspore/nn/layer/math.py +111 -112
  177. mindspore/nn/layer/normalization.py +56 -44
  178. mindspore/nn/layer/pooling.py +58 -63
  179. mindspore/nn/layer/rnn_cells.py +33 -33
  180. mindspore/nn/layer/rnns.py +56 -56
  181. mindspore/nn/layer/thor_layer.py +74 -73
  182. mindspore/nn/layer/transformer.py +11 -1
  183. mindspore/nn/learning_rate_schedule.py +20 -20
  184. mindspore/nn/loss/loss.py +79 -81
  185. mindspore/nn/optim/adam.py +4 -6
  186. mindspore/nn/optim/adasum.py +2 -2
  187. mindspore/nn/optim/asgd.py +2 -0
  188. mindspore/nn/optim/lamb.py +1 -3
  189. mindspore/nn/optim/optimizer.py +1 -1
  190. mindspore/nn/optim/tft_wrapper.py +2 -3
  191. mindspore/nn/optim/thor.py +2 -2
  192. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  193. mindspore/nn/probability/distribution/exponential.py +2 -1
  194. mindspore/nn/probability/distribution/poisson.py +2 -1
  195. mindspore/nn/sparse/sparse.py +3 -3
  196. mindspore/nn/wrap/cell_wrapper.py +73 -42
  197. mindspore/nn/wrap/grad_reducer.py +37 -52
  198. mindspore/nn/wrap/loss_scale.py +72 -74
  199. mindspore/numpy/array_creations.py +7 -7
  200. mindspore/numpy/fft.py +1 -1
  201. mindspore/numpy/math_ops.py +5 -5
  202. mindspore/numpy/utils_const.py +1 -1
  203. mindspore/opencv_core452.dll +0 -0
  204. mindspore/opencv_imgcodecs452.dll +0 -0
  205. mindspore/opencv_imgproc452.dll +0 -0
  206. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  207. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  208. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  209. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  210. mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
  211. mindspore/ops/_vmap/vmap_array_ops.py +31 -13
  212. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  213. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +54 -13
  214. mindspore/ops/auto_generate/gen_extend_func.py +27 -145
  215. mindspore/ops/auto_generate/gen_ops_def.py +1027 -347
  216. mindspore/ops/auto_generate/gen_ops_prim.py +2341 -1117
  217. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  218. mindspore/ops/composite/__init__.py +10 -0
  219. mindspore/ops/composite/base.py +9 -5
  220. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  221. mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
  222. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  223. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  224. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  225. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  226. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  227. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  228. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  229. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  230. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  231. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  232. mindspore/ops/function/__init__.py +4 -1
  233. mindspore/ops/function/_add_attr_func.py +11 -6
  234. mindspore/ops/function/array_func.py +19 -102
  235. mindspore/ops/function/debug_func.py +8 -5
  236. mindspore/ops/function/grad/grad_func.py +5 -13
  237. mindspore/ops/function/math_func.py +77 -572
  238. mindspore/ops/function/nn_func.py +46 -94
  239. mindspore/ops/function/other_func.py +4 -1
  240. mindspore/ops/function/random_func.py +44 -5
  241. mindspore/ops/function/vmap_func.py +2 -1
  242. mindspore/ops/functional.py +4 -4
  243. mindspore/ops/functional_overload.py +594 -18
  244. mindspore/ops/op_info_register.py +21 -0
  245. mindspore/ops/operations/__init__.py +16 -11
  246. mindspore/ops/operations/_custom_ops_utils.py +689 -34
  247. mindspore/ops/operations/_inner_ops.py +14 -18
  248. mindspore/ops/operations/_sequence_ops.py +1 -1
  249. mindspore/ops/operations/array_ops.py +5 -51
  250. mindspore/ops/operations/comm_ops.py +186 -41
  251. mindspore/ops/operations/custom_ops.py +303 -177
  252. mindspore/ops/operations/debug_ops.py +59 -4
  253. mindspore/ops/operations/image_ops.py +13 -13
  254. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  255. mindspore/ops/operations/math_ops.py +8 -9
  256. mindspore/ops/operations/nn_ops.py +8 -40
  257. mindspore/ops/primitive.py +9 -20
  258. mindspore/ops/tensor_method.py +63 -15
  259. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  260. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  261. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  262. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  263. mindspore/ops_generate/common/base_generator.py +14 -0
  264. mindspore/ops_generate/common/gen_constants.py +8 -3
  265. mindspore/ops_generate/common/gen_utils.py +0 -19
  266. mindspore/ops_generate/common/op_proto.py +11 -4
  267. mindspore/ops_generate/common/template.py +88 -11
  268. mindspore/ops_generate/gen_ops.py +1 -1
  269. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  270. mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
  271. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  272. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  273. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  274. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  275. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  276. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  277. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  278. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  279. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  280. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  281. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  282. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  283. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  284. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  285. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  286. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  287. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  288. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  289. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  290. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  291. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  292. mindspore/parallel/_auto_parallel_context.py +16 -23
  293. mindspore/parallel/_cell_wrapper.py +113 -45
  294. mindspore/parallel/_parallel_serialization.py +4 -3
  295. mindspore/parallel/_ps_context.py +4 -6
  296. mindspore/parallel/_tensor.py +167 -12
  297. mindspore/parallel/_transformer/moe.py +1 -1
  298. mindspore/parallel/_transformer/transformer.py +17 -12
  299. mindspore/parallel/_utils.py +5 -11
  300. mindspore/parallel/auto_parallel.py +35 -14
  301. mindspore/parallel/checkpoint_convert.py +3 -3
  302. mindspore/parallel/checkpoint_transform.py +13 -7
  303. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  304. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  305. mindspore/parallel/cluster/run.py +48 -7
  306. mindspore/parallel/function/__init__.py +8 -1
  307. mindspore/parallel/function/reshard_func.py +12 -12
  308. mindspore/parallel/nn/__init__.py +15 -2
  309. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  310. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  311. mindspore/parallel/shard.py +10 -25
  312. mindspore/parallel/transform_safetensors.py +469 -174
  313. mindspore/pgodb140.dll +0 -0
  314. mindspore/pgort140.dll +0 -0
  315. mindspore/profiler/__init__.py +2 -1
  316. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  317. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  318. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
  319. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  320. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  321. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  322. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  323. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  324. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  325. mindspore/profiler/analysis/task_manager.py +1 -1
  326. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  327. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  328. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  329. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  330. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  331. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  332. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  333. mindspore/profiler/common/constant.py +16 -0
  334. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  335. mindspore/profiler/common/path_manager.py +9 -0
  336. mindspore/profiler/common/profiler_context.py +50 -29
  337. mindspore/profiler/common/profiler_info.py +0 -16
  338. mindspore/profiler/common/profiler_meta_data.py +1 -0
  339. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  340. mindspore/profiler/common/profiler_output_path.py +23 -8
  341. mindspore/profiler/common/profiler_parameters.py +128 -35
  342. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  343. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  344. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  345. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  346. mindspore/profiler/dynamic_profiler.py +374 -338
  347. mindspore/profiler/envprofiler.py +42 -12
  348. mindspore/profiler/experimental_config.py +112 -7
  349. mindspore/profiler/mstx.py +33 -12
  350. mindspore/profiler/platform/__init__.py +2 -3
  351. mindspore/profiler/platform/cpu_profiler.py +10 -4
  352. mindspore/profiler/platform/npu_profiler.py +30 -20
  353. mindspore/profiler/profiler.py +218 -154
  354. mindspore/profiler/profiler_action_controller.py +65 -77
  355. mindspore/profiler/profiler_interface.py +2 -2
  356. mindspore/profiler/schedule.py +10 -4
  357. mindspore/rewrite/common/config.py +1 -0
  358. mindspore/rewrite/common/namer.py +1 -0
  359. mindspore/rewrite/common/namespace.py +1 -0
  360. mindspore/rewrite/node/node.py +31 -11
  361. mindspore/rewrite/parsers/assign_parser.py +1 -1
  362. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  363. mindspore/run_check/_check_version.py +7 -10
  364. mindspore/runtime/__init__.py +8 -6
  365. mindspore/runtime/event.py +10 -4
  366. mindspore/runtime/executor.py +87 -45
  367. mindspore/runtime/memory.py +31 -32
  368. mindspore/runtime/thread_bind_core.py +299 -165
  369. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  370. mindspore/swresample-4.dll +0 -0
  371. mindspore/swscale-6.dll +0 -0
  372. mindspore/tbbmalloc.dll +0 -0
  373. mindspore/tinyxml2.dll +0 -0
  374. mindspore/train/_utils.py +17 -7
  375. mindspore/train/amp.py +43 -23
  376. mindspore/train/callback/__init__.py +5 -5
  377. mindspore/train/callback/_callback.py +2 -1
  378. mindspore/train/callback/_checkpoint.py +4 -14
  379. mindspore/train/callback/_flops_collector.py +11 -7
  380. mindspore/train/callback/_landscape.py +0 -1
  381. mindspore/train/callback/_train_fault_tolerance.py +98 -21
  382. mindspore/train/data_sink.py +15 -6
  383. mindspore/train/dataset_helper.py +14 -5
  384. mindspore/train/model.py +133 -69
  385. mindspore/train/serialization.py +168 -126
  386. mindspore/train/summary/summary_record.py +13 -2
  387. mindspore/train/train_thor/model_thor.py +2 -2
  388. mindspore/turbojpeg.dll +0 -0
  389. mindspore/utils/__init__.py +3 -2
  390. mindspore/utils/dryrun.py +0 -6
  391. mindspore/utils/runtime_execution_order_check.py +163 -77
  392. mindspore/utils/sdc_detect.py +68 -0
  393. mindspore/utils/utils.py +14 -17
  394. mindspore/vcmeta.dll +0 -0
  395. mindspore/vcruntime140.dll +0 -0
  396. mindspore/vcruntime140_1.dll +0 -0
  397. mindspore/version.py +1 -1
  398. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
  399. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/RECORD +403 -442
  400. mindspore/_deprecated/jit.py +0 -198
  401. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  402. mindspore/communication/_hccl_management.py +0 -297
  403. mindspore/experimental/es/embedding_service.py +0 -891
  404. mindspore/experimental/es/embedding_service_layer.py +0 -581
  405. mindspore/profiler/common/validator/__init__.py +0 -14
  406. mindspore/profiler/common/validator/validate_path.py +0 -84
  407. mindspore/profiler/parser/__init__.py +0 -14
  408. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  409. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  410. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  411. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  412. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  413. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  414. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  415. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  416. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  417. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  418. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  419. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  420. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  421. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  422. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  423. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  424. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  425. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  426. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  427. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  428. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  429. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  430. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  431. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  432. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  433. mindspore/profiler/parser/container.py +0 -229
  434. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  435. mindspore/profiler/parser/flops_parser.py +0 -531
  436. mindspore/profiler/parser/framework_enum.py +0 -111
  437. mindspore/profiler/parser/framework_parser.py +0 -464
  438. mindspore/profiler/parser/framework_struct.py +0 -61
  439. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  440. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  441. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  442. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  443. mindspore/profiler/parser/hccl_parser.py +0 -573
  444. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  445. mindspore/profiler/parser/integrator.py +0 -526
  446. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  447. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  448. mindspore/profiler/parser/minddata_parser.py +0 -186
  449. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  450. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  451. mindspore/profiler/parser/optime_parser.py +0 -250
  452. mindspore/profiler/parser/profiler_info.py +0 -213
  453. mindspore/profiler/parser/step_trace_parser.py +0 -666
  454. mindspore/utils/hooks.py +0 -81
  455. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  456. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
  457. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  458. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,10 @@
1
- # Copyright 2023 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
1
+ # The code implementation refers to the following files from pytorch:
2
+ # - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/adamw.py
3
+ # Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
14
4
  # ============================================================================
15
5
  """adamw"""
16
6
  from __future__ import absolute_import
17
7
 
18
- from mindspore.ops import functional as F, composite as C, operations as P
19
8
  from mindspore.common.parameter import Parameter
20
9
  from mindspore.common.tensor import Tensor
21
10
  import mindspore.common.dtype as mstype
@@ -25,14 +14,14 @@ from mindspore.ops import auto_generate as gen
25
14
  from mindspore import ops
26
15
  from mindspore import jit
27
16
 
28
- _adamw_opt = C.MultitypeFuncGraph("adamw_opt")
29
- _speed_adamw_opt = C.MultitypeFuncGraph("speed_adamw_opt")
17
+ _adamw_opt = ops.MultitypeFuncGraph("adamw_opt")
18
+ _speed_adamw_opt = ops.MultitypeFuncGraph("speed_adamw_opt")
30
19
 
31
- op_mul = P.Mul()
32
- op_pow = P.Pow()
33
- op_sqrt = P.Sqrt()
34
- op_maximum = P.Maximum()
35
- hyper_map = C.HyperMap()
20
+ op_mul = ops.Mul()
21
+ op_pow = ops.Pow()
22
+ op_sqrt = ops.Sqrt()
23
+ op_maximum = ops.Maximum()
24
+ hyper_map = ops.HyperMap()
36
25
 
37
26
 
38
27
  @_speed_adamw_opt.register("Function", "Float", "Float", "Tensor", "Float", "Float", "Bool", "Bool", "Tensor", "Tensor",
@@ -76,18 +65,18 @@ def _run_adamw_opt(weight_decay_new, step_size, amsgrad, eps, bias_correction2_s
76
65
  """Apply adamw optimizer to the weight parameter."""
77
66
  success = True
78
67
  next_param = op_mul(param, weight_decay_new)
79
- F.assign(exp_avg, op_mul(exp_avg, beta1) + op_mul(grad, 1 - beta1))
80
- F.assign(exp_avg_sq, ops.addcmul(op_mul(exp_avg_sq, beta2), grad, grad, 1 - beta2))
68
+ ops.assign(exp_avg, op_mul(exp_avg, beta1) + op_mul(grad, 1 - beta1))
69
+ ops.assign(exp_avg_sq, ops.addcmul(op_mul(exp_avg_sq, beta2), grad, grad, 1 - beta2))
81
70
 
82
71
  if amsgrad:
83
72
  next_max_exp_avg = op_maximum(max_exp_avg_sq, exp_avg_sq)
84
73
  denom = op_sqrt(next_max_exp_avg) / bias_correction2_sqrt + eps
85
- F.assign(max_exp_avg_sq, next_max_exp_avg)
74
+ ops.assign(max_exp_avg_sq, next_max_exp_avg)
86
75
  else:
87
76
  denom = op_sqrt(exp_avg_sq) / bias_correction2_sqrt + eps
88
77
 
89
78
  return_param = next_param - op_mul(exp_avg / denom, step_size)
90
- F.assign(param, return_param)
79
+ ops.assign(param, return_param)
91
80
  return success
92
81
 
93
82
 
@@ -129,6 +118,10 @@ class AdamW(Optimizer):
129
118
  &\rule{180mm}{0.4pt} \\[-1.ex]
130
119
  \end{aligned}
131
120
 
121
+ More details of the AdamW algorithm can be found in the paper `Decoupled Weight Decay Regularization
122
+ <https://arxiv.org/abs/1711.05101>`_ and `On the Convergence of Adam and Beyond
123
+ <https://openreview.net/forum?id=ryQu7f-RZ>`_.
124
+
132
125
  .. warning::
133
126
  This is an experimental optimizer API that is subject to change.
134
127
  This module must be used with lr scheduler module in `LRScheduler Class
@@ -205,16 +198,16 @@ class AdamW(Optimizer):
205
198
  self.max_exp_avg_sq = self.parameters.clone(prefix="max_exp_avg_sq", init='zeros')
206
199
  self.state_step = Parameter(Tensor(0, mstype.int32), "state_step")
207
200
  self.increase_tensor = Tensor(1, mstype.int32)
208
- self.assignadd = P.AssignAdd()
209
- self.op_cast = P.Cast()
201
+ self.assignadd = ops.AssignAdd()
202
+ self.op_cast = ops.Cast()
210
203
 
211
204
  @jit
212
205
  def implementation(self, lr, weight_decay, beta1, beta2, amsgrad, eps, grads, start_id, end_id):
213
206
  """Extract the common computing part for acceleration"""
214
207
  weight_decay_new, step_size, bias_correction2_sqrt = prepare_func(lr, weight_decay,
215
208
  self.state_step, beta1, beta2)
216
- self.hyper_map(F.partial(_adamw_opt, weight_decay_new, step_size, amsgrad,
217
- eps, bias_correction2_sqrt, beta1, beta2),
209
+ self.hyper_map(ops.partial(_adamw_opt, weight_decay_new, step_size, amsgrad,
210
+ eps, bias_correction2_sqrt, beta1, beta2),
218
211
  self.parameters[start_id: end_id], grads, self.exp_avg[start_id: end_id],
219
212
  self.exp_avg_sq[start_id: end_id], self.max_exp_avg_sq[start_id: end_id])
220
213
  return True
@@ -228,7 +221,8 @@ class AdamW(Optimizer):
228
221
  lr = self.lrs[group_id]
229
222
  if isinstance(group.get("lr"), float):
230
223
  lr = self.op_cast(group.get("lr"), mstype.float32)
231
- grads = tuple([grad if not group.get("maximize") else F.neg(grad) for grad in gradients[start_id: end_id]])
224
+ grads = tuple([grad if not group.get("maximize") else ops.neg(grad) \
225
+ for grad in gradients[start_id:end_id]])
232
226
 
233
227
  self.implementation(lr, group.get("weight_decay"), beta1, beta2, group.get("amsgrad"), group.get("eps"),
234
228
  grads, start_id, end_id)
@@ -265,7 +259,7 @@ class SpeedAdamW(Optimizer):
265
259
  self.exp_avg_sq = self.parameters.clone(prefix="exp_avg_sq", init='zeros')
266
260
  self.state_step = Parameter(Tensor([0], mstype.float32), "state_step")
267
261
  self.increase_tensor = Tensor(1, mstype.float32)
268
- self.assignadd = P.AssignAdd()
262
+ self.assignadd = ops.AssignAdd()
269
263
  self.adamw_opt = gen.ApplyAdamW()
270
264
 
271
265
  def construct(self, gradients):
@@ -285,9 +279,9 @@ class SpeedAdamW(Optimizer):
285
279
  if group.get("amsgrad"):
286
280
  raise ValueError("For SpeedAdamW, the value of amsgrad can only be False.")
287
281
 
288
- self.hyper_map(F.partial(_speed_adamw_opt, self.adamw_opt, beta1, beta2, lr,
289
- group.get("eps"), group.get("weight_decay"),
290
- group.get("amsgrad"), maximize, bias_correction1, bias_correction2),
282
+ self.hyper_map(ops.partial(_speed_adamw_opt, self.adamw_opt, beta1, beta2, lr,
283
+ group.get("eps"), group.get("weight_decay"),
284
+ group.get("amsgrad"), maximize, bias_correction1, bias_correction2),
291
285
  self.parameters[start_id: end_id], grads, self.exp_avg[start_id: end_id],
292
286
  self.exp_avg_sq[start_id: end_id])
293
287
 
@@ -1,33 +1,23 @@
1
- # Copyright 2021-2022 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
1
+ # The code implementation refers to the following files from pytorch:
2
+ # - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/asgd.py
3
+ # Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
14
4
  # ============================================================================
15
5
  """asgd"""
16
6
  from __future__ import absolute_import
17
7
 
18
- from mindspore.ops import functional as F, composite as C, operations as P
8
+ from mindspore import ops
19
9
  from mindspore.common import Tensor, Parameter
20
10
  import mindspore.common.dtype as mstype
21
11
  from mindspore.experimental.optim.optimizer import Optimizer, check_not_less_than, check_not_less_than_without_equal
22
12
  from mindspore.common.api import jit
23
13
 
24
- _asgd_opt = C.MultitypeFuncGraph("asgd_opt")
14
+ _asgd_opt = ops.MultitypeFuncGraph("asgd_opt")
25
15
 
26
- op_cast = P.Cast()
27
- op_pow = P.Pow()
28
- op_maximum = P.Maximum()
29
- op_assign = P.Assign()
30
- op_assignadd = P.AssignAdd()
16
+ op_cast = ops.Cast()
17
+ op_pow = ops.Pow()
18
+ op_maximum = ops.Maximum()
19
+ op_assign = ops.Assign()
20
+ op_assignadd = ops.AssignAdd()
31
21
 
32
22
 
33
23
  @_asgd_opt.register("Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor",
@@ -37,7 +27,7 @@ def _run_asgd_opt(lambd, alpha, t0, step, lr, param, grad, eta, mu, ax):
37
27
  if step == 1:
38
28
  op_assign(eta, lr)
39
29
  next_param = op_cast(param * (1. - lambd * eta) - eta * grad, param.dtype)
40
- F.assign(param, next_param)
30
+ ops.assign(param, next_param)
41
31
 
42
32
  if mu != 1:
43
33
  op_assignadd(ax, op_cast((next_param - ax) * mu, ax.dtype))
@@ -121,8 +111,8 @@ class ASGD(Optimizer):
121
111
  self.ax = self.parameters.clone(prefix="ax", init='zeros')
122
112
  self.step_t = Parameter(Tensor(0, mstype.int32), "step_t")
123
113
  self.increase_tensor = Tensor(1, mstype.int32)
124
- self.assignadd = P.AssignAdd()
125
- self.op_cast = P.Cast()
114
+ self.assignadd = ops.AssignAdd()
115
+ self.op_cast = ops.Cast()
126
116
 
127
117
  @jit(backend="ms_backend")
128
118
  def implementation(self, lambd, alpha, t0, lr, group_id, maximize, gradients, weight_decay):
@@ -130,13 +120,13 @@ class ASGD(Optimizer):
130
120
  start_id = self.group_start_id[group_id]
131
121
  end_id = self.group_start_id[group_id + 1]
132
122
  params = self.parameters[start_id: end_id]
133
- grads = tuple([grad if not maximize else F.neg(grad) for grad in gradients[start_id: end_id]])
123
+ grads = tuple([grad if not maximize else ops.neg(grad) for grad in gradients[start_id: end_id]])
134
124
  grads = self._decay_weight(weight_decay, params, grads)
135
125
 
136
126
  ax = self.ax[start_id: end_id]
137
127
  eta = self.eta[start_id: end_id]
138
128
  mu = self.mu[start_id: end_id]
139
- self.hyper_map(F.partial(_asgd_opt, lambd, alpha, t0, self.step_t, lr),
129
+ self.hyper_map(ops.partial(_asgd_opt, lambd, alpha, t0, self.step_t, lr),
140
130
  params, grads, eta, mu, ax)
141
131
  return True
142
132
 
@@ -1,16 +1,6 @@
1
- # Copyright 2023 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
1
+ # The code implementation refers to the following files from pytorch:
2
+ # - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/lr_scheduler.py
3
+ # Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
14
4
  # ============================================================================
15
5
  """LRScheduler."""
16
6
  from collections import Counter
@@ -20,8 +10,6 @@ from mindspore import ops, Tensor, Parameter
20
10
  from mindspore.experimental.optim.optimizer import Optimizer
21
11
  from mindspore.common.api import jit_class
22
12
  import mindspore.common.dtype as mstype
23
- from mindspore.ops import functional as F
24
- from mindspore.ops import operations as P
25
13
  from mindspore import _checkparam as Validator
26
14
 
27
15
  __all__ = ['StepLR', 'LinearLR', 'LRScheduler', 'ExponentialLR', 'PolynomialLR',
@@ -143,9 +131,12 @@ class LRScheduler:
143
131
 
144
132
  @jit_class
145
133
  class StepLR(LRScheduler):
146
- """Decays the learning rate of each parameter group by gamma every
147
- step_size epochs. Notice that such decay can happen simultaneously with
148
- other changes to the learning rate from outside this scheduler.
134
+ """
135
+ During training, when calling `StepLR.step()` , if the current epoch number is an integer multiple of `step_size` ,
136
+ the learning rate will be decayed by multiplying it with `gamma` . The adjustment of the learning rate and
137
+ the parameter update of the optimizer are synergistically performed. The optimizer executes parameter optimization
138
+ operations based on the currently adjusted learning rate. The learning rate decay of StepLR may occur simultaneously
139
+ with external changes to the learning rate.
149
140
 
150
141
  .. warning::
151
142
  This is an experimental lr scheduler module that is subject to change.
@@ -431,8 +422,8 @@ class PolynomialLR(LRScheduler):
431
422
  raise TypeError(f"For 'PolynomialLR', the type of total_iters must be int, but got {type(total_iters)}.")
432
423
  self.total_iters = total_iters
433
424
  self.power = power
434
- self.min = P.Minimum()
435
- self.cast = P.Cast()
425
+ self.min = ops.Minimum()
426
+ self.cast = ops.Cast()
436
427
  super(PolynomialLR, self).__init__(optimizer, last_epoch)
437
428
 
438
429
  def get_lr(self):
@@ -804,7 +795,7 @@ class SequentialLR:
804
795
 
805
796
  @jit_class
806
797
  class ReduceLROnPlateau:
807
- """
798
+ r"""
808
799
  Reduce learning rate when a metric has stopped improving.
809
800
  Models often benefit from reducing the learning rate by a factor
810
801
  of 2-10 once learning stagnates. The scheduler reads the metrics `metrics` during execution
@@ -886,7 +877,7 @@ class ReduceLROnPlateau:
886
877
  [Tensor(shape=[], dtype=Float32, value= 0.001)]
887
878
  [Tensor(shape=[], dtype=Float32, value= 0.001)]
888
879
  [Tensor(shape=[], dtype=Float32, value= 0.0001)]
889
- """
880
+ """
890
881
 
891
882
  def __init__(self, optimizer, mode='min', factor=0.1, patience=10,
892
883
  threshold=1e-4, threshold_mode='rel', cooldown=0,
@@ -915,8 +906,8 @@ class ReduceLROnPlateau:
915
906
  self.cooldown_counter = 0
916
907
  self.eps = eps
917
908
  self.mode_worse = None
918
- self.assign = P.Assign()
919
- self.cast = P.Cast()
909
+ self.assign = ops.Assign()
910
+ self.cast = ops.Cast()
920
911
  self.last_epoch = Parameter(Tensor(0, dtype=mstype.int32),
921
912
  name='last_epoch_' + self.__class__.__name__)
922
913
 
@@ -1079,17 +1070,8 @@ class CyclicLR(LRScheduler):
1079
1070
  [Tensor(shape=[], dtype=Float32, value= 0.010225)]
1080
1071
  """
1081
1072
 
1082
- def __init__(self,
1083
- optimizer,
1084
- base_lr,
1085
- max_lr,
1086
- step_size_up=2000,
1087
- step_size_down=None,
1088
- mode='triangular',
1089
- gamma=1.,
1090
- scale_fn=None,
1091
- scale_mode='cycle',
1092
- last_epoch=-1):
1073
+ def __init__(self, optimizer, base_lr, max_lr, step_size_up=2000, step_size_down=None, mode='triangular',
1074
+ gamma=1.0, scale_fn=None, scale_mode='cycle', last_epoch=-1):
1093
1075
 
1094
1076
  base_lrs = self._preprocess_input_param(optimizer, base_lr, 'base_lr')
1095
1077
 
@@ -1117,7 +1099,7 @@ class CyclicLR(LRScheduler):
1117
1099
  self._scale_fn_custom = scale_fn
1118
1100
  self.scale_mode = scale_mode
1119
1101
  self._init_scale_fn()
1120
- self.floor = P.Floor()
1102
+ self.floor = ops.Floor()
1121
1103
 
1122
1104
  super(CyclicLR, self).__init__(optimizer, last_epoch)
1123
1105
  self.base_lrs = [Tensor(lr) for lr in base_lrs]
@@ -1252,12 +1234,12 @@ class CosineAnnealingWarmRestarts(LRScheduler):
1252
1234
  self.zero_tensor = Tensor(0, mstype.int32)
1253
1235
 
1254
1236
  self.math_pi = math.pi
1255
- self.cos = P.Cos()
1256
- self.cast = P.Cast()
1257
- self.log = P.Log()
1258
- self.cast = P.Cast()
1259
- self.assign = P.Assign()
1260
- self.floor = P.Floor()
1237
+ self.cos = ops.Cos()
1238
+ self.cast = ops.Cast()
1239
+ self.log = ops.Log()
1240
+ self.cast = ops.Cast()
1241
+ self.assign = ops.Assign()
1242
+ self.floor = ops.Floor()
1261
1243
  self._last_lr = [group["lr"] for group in optimizer.param_groups]
1262
1244
  super(CosineAnnealingWarmRestarts, self).__init__(optimizer, last_epoch)
1263
1245
 
@@ -1306,7 +1288,7 @@ class CosineAnnealingWarmRestarts(LRScheduler):
1306
1288
 
1307
1289
  for i, data in enumerate(zip(self.optimizer.param_groups, self.get_lr())):
1308
1290
  _, lr = data
1309
- F.assign(self.optimizer.param_groups[i]["lr"], lr)
1291
+ ops.assign(self.optimizer.param_groups[i]["lr"], lr)
1310
1292
 
1311
1293
 
1312
1294
  @jit_class
@@ -1371,8 +1353,8 @@ class CosineAnnealingLR(LRScheduler):
1371
1353
  self.T_max = T_max
1372
1354
  self.eta_min = eta_min
1373
1355
  self.math_pi = math.pi
1374
- self.cos = P.Cos()
1375
- self.cast = P.Cast()
1356
+ self.cos = ops.Cos()
1357
+ self.cast = ops.Cast()
1376
1358
  super(CosineAnnealingLR, self).__init__(optimizer, last_epoch)
1377
1359
 
1378
1360
  def get_lr(self):
@@ -1,30 +1,20 @@
1
- # Copyright 2023 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
1
+ # The code implementation refers to the following files from pytorch:
2
+ # - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/nadam.py
3
+ # Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
14
4
  # ============================================================================
15
5
  """nadam"""
16
6
  from __future__ import absolute_import
17
7
 
18
- from mindspore.ops import functional as F, composite as C, operations as P
8
+ from mindspore import ops
19
9
  from mindspore.common import Parameter, Tensor
20
10
  import mindspore.common.dtype as mstype
21
11
  from mindspore import _checkparam as validator
22
12
  from mindspore.experimental.optim.optimizer import Optimizer, check_not_less_than, check_not_less_than_without_equal
23
13
  from mindspore import jit
24
14
 
25
- _nadam_opt = C.MultitypeFuncGraph("nadam_opt")
15
+ _nadam_opt = ops.MultitypeFuncGraph("nadam_opt")
26
16
 
27
- op_sqrt = P.Sqrt()
17
+ op_sqrt = ops.Sqrt()
28
18
 
29
19
 
30
20
  @_nadam_opt.register("Number", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor",
@@ -34,15 +24,15 @@ def _tensor_run_opt(beta1, beta2, momentum_decay, eps, step_t, lr, param, grad,
34
24
  bias_correction2 = 1 - beta2 ** step_t
35
25
  mu = beta1 * (1. - 0.5 * (0.96 ** (step_t * momentum_decay)))
36
26
  mu_next = beta1 * (1. - 0.5 * (0.96 ** ((step_t + 1) * momentum_decay)))
37
- F.assign(mu_product, mu_product * mu)
38
- F.assign(exp_avg, exp_avg * beta1 + grad * (1 - beta1))
39
- F.assign(exp_avg_sq, exp_avg_sq * beta2 + grad * grad * (1 - beta2))
27
+ ops.assign(mu_product, mu_product * mu)
28
+ ops.assign(exp_avg, exp_avg * beta1 + grad * (1 - beta1))
29
+ ops.assign(exp_avg_sq, exp_avg_sq * beta2 + grad * grad * (1 - beta2))
40
30
 
41
31
  denom = op_sqrt(exp_avg_sq / bias_correction2) + eps
42
32
 
43
33
  mu_product_next = mu_product * mu_next
44
- F.assign(param, param - lr * (1. - mu) / (1. - mu_product) * grad / denom)
45
- F.assign(param, param - (lr * mu_next) / (1. - mu_product_next) * exp_avg / denom)
34
+ ops.assign(param, param - lr * (1. - mu) / (1. - mu_product) * grad / denom)
35
+ ops.assign(param, param - (lr * mu_next) / (1. - mu_product_next) * exp_avg / denom)
46
36
 
47
37
  return True
48
38
 
@@ -122,8 +112,8 @@ class NAdam(Optimizer):
122
112
  self.mu_product = [Parameter(Tensor(1.), "mu_product_" + param.name) for param in self.parameters]
123
113
 
124
114
  self.increase_tensor = Tensor(1, mstype.int32)
125
- self.assignadd = P.AssignAdd()
126
- self.op_cast = P.Cast()
115
+ self.assignadd = ops.AssignAdd()
116
+ self.op_cast = ops.Cast()
127
117
 
128
118
  @jit
129
119
  def implementation(self, lr, beta1, beta2, weight_decay, momentum_decay, eps, start_id, end_id, gradients):
@@ -135,7 +125,7 @@ class NAdam(Optimizer):
135
125
  exp_avg_sq = self.exp_avg_sq[start_id: end_id]
136
126
  mu_product = self.mu_product[start_id: end_id]
137
127
 
138
- self.hyper_map(F.partial(_nadam_opt, beta1, beta2, momentum_decay, eps, self.step_t, lr),
128
+ self.hyper_map(ops.partial(_nadam_opt, beta1, beta2, momentum_decay, eps, self.step_t, lr),
139
129
  params, grads, exp_avg, exp_avg_sq, mu_product)
140
130
  return True
141
131
 
@@ -1,22 +1,12 @@
1
- # Copyright 2023 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
1
+ # The code implementation refers to the following files from pytorch:
2
+ # - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/optimizer.py
3
+ # Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
14
4
  # ============================================================================
15
5
  """optimizer"""
16
6
  from __future__ import absolute_import
17
7
  from collections import defaultdict
18
8
  from typing import Iterable
19
- from mindspore.ops import functional as F, composite as C, operations as P
9
+ from mindspore import ops
20
10
 
21
11
  from mindspore.nn.cell import Cell
22
12
  from mindspore.common.parameter import Parameter, ParameterTuple
@@ -98,7 +88,7 @@ class Optimizer(Cell):
98
88
  self.param_groups = []
99
89
  self.parameters = []
100
90
  self.lrs = []
101
- self.map_ = C.Map()
91
+ self.map_ = ops.Map()
102
92
  self.group_start_id = [0]
103
93
  if not isinstance(param_groups[0], dict):
104
94
  param_groups = [{'params': param_groups}]
@@ -106,7 +96,7 @@ class Optimizer(Cell):
106
96
  for param_group in param_groups:
107
97
  self.add_param_group(param_group)
108
98
  self.parameters = ParameterTuple(self.parameters)
109
- self.hyper_map = C.HyperMap()
99
+ self.hyper_map = ops.HyperMap()
110
100
  self.enable_tuple_broaden = True
111
101
 
112
102
  def __repr__(self):
@@ -167,7 +157,7 @@ class Optimizer(Cell):
167
157
  """Apply weight decay."""
168
158
  if weight_decay != 0.:
169
159
  weight_decay = Tensor(weight_decay, mstype.float32)
170
- gradients = self.map_(F.partial(_apply_decay, weight_decay), params, gradients)
160
+ gradients = self.map_(ops.partial(_apply_decay, weight_decay), params, gradients)
171
161
  return gradients
172
162
 
173
163
  def _preprocess_param_group(self, param_group):
@@ -228,18 +218,18 @@ class Optimizer(Cell):
228
218
  def construct(self, *hyper_params):
229
219
  raise NotImplementedError
230
220
 
231
- op_add = P.AddN()
232
- op_gather = P.Gather()
233
- op_mul = P.Mul()
221
+ op_add = ops.AddN()
222
+ op_gather = ops.Gather()
223
+ op_mul = ops.Mul()
234
224
 
235
- _apply_decay = C.MultitypeFuncGraph("apply_decay")
225
+ _apply_decay = ops.MultitypeFuncGraph("apply_decay")
236
226
 
237
227
 
238
228
  @_apply_decay.register("Tensor", "Tensor", "RowTensor")
239
229
  def _tensor_apply_decay_with_sparse(weight_decay, weight, gradient):
240
230
  """Get grad with weight_decay."""
241
231
  indices = gradient.indices
242
- values = op_add((op_gather(weight, indices, 0) * F.cast(weight_decay, F.dtype(weight)), gradient.values))
232
+ values = op_add((op_gather(weight, indices, 0) * ops.cast(weight_decay, ops.dtype(weight)), gradient.values))
243
233
  shape = gradient.dense_shape
244
234
  return RowTensorInner(indices, values, shape)
245
235
 
@@ -247,7 +237,7 @@ def _tensor_apply_decay_with_sparse(weight_decay, weight, gradient):
247
237
  @_apply_decay.register("Tensor", "Tensor", "Tensor")
248
238
  def _tensor_apply_decay(weight_decay, weight, gradient):
249
239
  """Get grad with weight_decay."""
250
- return op_add((op_mul(weight, F.cast(weight_decay, F.dtype(weight))), gradient))
240
+ return op_add((op_mul(weight, ops.cast(weight_decay, ops.dtype(weight))), gradient))
251
241
 
252
242
 
253
243
  def check_not_less_than(arg_value, arg_name, prim, value=0.0):
@@ -1,32 +1,22 @@
1
- # Copyright 2023 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
1
+ # The code implementation refers to the following files from pytorch:
2
+ # - https://github.com/pytorch/pytorch/blob/v1.13.0/torch/optim/radam.py
3
+ # Additional modifications are made by Huawei Technologies Co., Ltd in 2023.
14
4
  # ============================================================================
15
5
  """radam"""
16
6
  from __future__ import absolute_import
17
7
 
18
- from mindspore.ops import functional as F, composite as C, operations as P
8
+ from mindspore import ops
19
9
  from mindspore.common import Tensor, Parameter
20
10
  import mindspore.common.dtype as mstype
21
11
  from mindspore import _checkparam as validator
22
12
  from mindspore.experimental.optim.optimizer import Optimizer, check_not_less_than, check_not_less_than_without_equal
23
13
  from mindspore import jit
24
14
 
25
- _radam_opt = C.MultitypeFuncGraph("radam_opt")
15
+ _radam_opt = ops.MultitypeFuncGraph("radam_opt")
26
16
 
27
- op_pow = P.Pow()
28
- op_sqrt = P.Sqrt()
29
- op_cast = P.Cast()
17
+ op_pow = ops.Pow()
18
+ op_sqrt = ops.Sqrt()
19
+ op_cast = ops.Cast()
30
20
 
31
21
 
32
22
  @_radam_opt.register("Number", "Number", "Number", "Tensor", "Number", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor",
@@ -35,17 +25,17 @@ def _tensor_run_opt(beta1, beta2, eps, lr, rho_inf, rho_t, bias_correction1, bia
35
25
  exp_avg_sq):
36
26
  """Apply radam optimizer to the weight parameter."""
37
27
 
38
- F.assign(exp_avg, exp_avg * beta1 + grad * (1 - beta1))
39
- F.assign(exp_avg_sq, exp_avg_sq * beta2 + grad * grad * (1 - beta2))
28
+ ops.assign(exp_avg, exp_avg * beta1 + grad * (1 - beta1))
29
+ ops.assign(exp_avg_sq, exp_avg_sq * beta2 + grad * grad * (1 - beta2))
40
30
  bias_corrected_exp_avg = exp_avg / bias_correction1
41
31
 
42
32
  if rho_t > 5.0:
43
33
  rect = op_sqrt((rho_t - 4) * (rho_t - 2) * rho_inf / ((rho_inf - 4) * (rho_inf - 2) * rho_t))
44
34
  exp_avg_sq_sqrt = op_sqrt(exp_avg_sq) + eps
45
35
  adaptive_lr = op_sqrt(bias_correction2) / exp_avg_sq_sqrt
46
- F.assign(param, param - bias_corrected_exp_avg * lr * adaptive_lr * rect)
36
+ ops.assign(param, param - bias_corrected_exp_avg * lr * adaptive_lr * rect)
47
37
  else:
48
- F.assign(param, param - bias_corrected_exp_avg * lr)
38
+ ops.assign(param, param - bias_corrected_exp_avg * lr)
49
39
 
50
40
  return True
51
41
 
@@ -89,6 +79,9 @@ class RAdam(Optimizer):
89
79
  &\rule{180mm}{0.4pt}
90
80
  \end{align*}
91
81
 
82
+ For more details about RAdam algorithm, please refer to `On the Variance of the Adaptive Learning Rate and Beyond
83
+ <https://arxiv.org/abs/1908.03265>`_.
84
+
92
85
  .. warning::
93
86
  This is an experimental optimizer API that is subject to change.
94
87
  This module must be used with lr scheduler module in `LRScheduler Class
@@ -155,7 +148,7 @@ class RAdam(Optimizer):
155
148
  self.exp_avg = self.parameters.clone(prefix="exp_avg", init='zeros')
156
149
  self.exp_avg_sq = self.parameters.clone(prefix="exp_avg_sq", init='zeros')
157
150
  self.increase_tensor = Tensor(1, mstype.int32)
158
- self.assignadd = P.AssignAdd()
151
+ self.assignadd = ops.AssignAdd()
159
152
 
160
153
  @jit(backend="ms_backend")
161
154
  def implementation(self, lr, beta1, beta2, weight_decay, eps, start_id, end_id, gradients):
@@ -175,7 +168,8 @@ class RAdam(Optimizer):
175
168
 
176
169
  rho_t = rho_inf - right
177
170
 
178
- self.hyper_map(F.partial(_radam_opt, beta1, beta2, eps, lr, rho_inf, rho_t, bias_correction1, bias_correction2),
171
+ self.hyper_map(ops.partial(_radam_opt, beta1, beta2, eps, lr, rho_inf,
172
+ rho_t, bias_correction1, bias_correction2),
179
173
  params, grads, exp_avg, exp_avg_sq)
180
174
  return True
181
175