mindspore 2.6.0rc1__cp39-cp39-win_amd64.whl → 2.7.0rc1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (384) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +1 -1
  3. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +40 -9
  7. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  8. mindspore/_extends/optimize/cell_utils.py +96 -0
  9. mindspore/_extends/parse/__init__.py +2 -2
  10. mindspore/_extends/parse/compile_config.py +44 -22
  11. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
  12. mindspore/_extends/parse/parser.py +37 -62
  13. mindspore/_extends/parse/resources.py +39 -0
  14. mindspore/_extends/parse/standard_method.py +43 -13
  15. mindspore/_extends/parse/trope.py +8 -1
  16. mindspore/_extends/pijit/__init__.py +1 -2
  17. mindspore/amp.py +4 -4
  18. mindspore/avcodec-59.dll +0 -0
  19. mindspore/avdevice-59.dll +0 -0
  20. mindspore/avfilter-8.dll +0 -0
  21. mindspore/avformat-59.dll +0 -0
  22. mindspore/avutil-57.dll +0 -0
  23. mindspore/boost/adasum.py +1 -1
  24. mindspore/boost/boost_cell_wrapper.py +4 -4
  25. mindspore/common/__init__.py +27 -2
  26. mindspore/common/_grad_function.py +2 -1
  27. mindspore/common/_pijit_context.py +28 -7
  28. mindspore/common/_stub_tensor.py +1 -209
  29. mindspore/common/_tensor_cpp_method.py +1 -1
  30. mindspore/common/_tensor_docs.py +77 -16
  31. mindspore/common/api.py +238 -113
  32. mindspore/common/dtype.py +21 -11
  33. mindspore/common/dump.py +10 -15
  34. mindspore/common/generator.py +5 -3
  35. mindspore/common/hook_handle.py +11 -2
  36. mindspore/common/jit_config.py +1 -1
  37. mindspore/common/jit_trace.py +84 -105
  38. mindspore/common/parameter.py +26 -12
  39. mindspore/common/recompute.py +3 -3
  40. mindspore/common/sparse_tensor.py +0 -3
  41. mindspore/common/symbol.py +0 -1
  42. mindspore/common/tensor.py +81 -81
  43. mindspore/communication/_comm_helper.py +46 -4
  44. mindspore/communication/management.py +79 -7
  45. mindspore/context.py +58 -40
  46. mindspore/dataset/core/config.py +3 -3
  47. mindspore/dataset/engine/datasets.py +20 -7
  48. mindspore/dataset/engine/datasets_user_defined.py +33 -3
  49. mindspore/dataset/engine/iterators.py +2 -2
  50. mindspore/dataset/engine/obs/config_loader.py +2 -2
  51. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  52. mindspore/dataset/transforms/py_transforms.py +7 -3
  53. mindspore/dataset/transforms/transforms.py +7 -3
  54. mindspore/dataset/vision/validators.py +1 -0
  55. mindspore/device_context/ascend/device.py +1 -1
  56. mindspore/device_context/gpu/__init__.py +2 -2
  57. mindspore/device_context/gpu/device.py +1 -1
  58. mindspore/device_context/gpu/op_precision.py +4 -2
  59. mindspore/device_context/gpu/op_tuning.py +6 -3
  60. mindspore/device_manager.py +16 -9
  61. mindspore/dnnl.dll +0 -0
  62. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -7
  63. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  64. mindspore/experimental/optim/adadelta.py +13 -20
  65. mindspore/experimental/optim/adagrad.py +15 -22
  66. mindspore/experimental/optim/adam.py +17 -24
  67. mindspore/experimental/optim/adamax.py +14 -22
  68. mindspore/experimental/optim/adamw.py +28 -34
  69. mindspore/experimental/optim/asgd.py +15 -25
  70. mindspore/experimental/optim/lr_scheduler.py +27 -45
  71. mindspore/experimental/optim/nadam.py +14 -24
  72. mindspore/experimental/optim/optimizer.py +13 -23
  73. mindspore/experimental/optim/radam.py +18 -24
  74. mindspore/experimental/optim/rmsprop.py +14 -25
  75. mindspore/experimental/optim/rprop.py +15 -26
  76. mindspore/experimental/optim/sgd.py +9 -19
  77. mindspore/hal/__init__.py +4 -4
  78. mindspore/hal/contiguous_tensors_handle.py +2 -2
  79. mindspore/hal/memory.py +27 -7
  80. mindspore/include/api/cell.h +37 -1
  81. mindspore/include/api/delegate.h +10 -0
  82. mindspore/include/api/model.h +3 -0
  83. mindspore/include/api/types.h +2 -2
  84. mindspore/include/c_api/model_c.h +0 -58
  85. mindspore/include/c_api/tensor_c.h +0 -26
  86. mindspore/include/dataset/vision_ascend.h +1 -1
  87. mindspore/jpeg62.dll +0 -0
  88. mindspore/mindrecord/tools/cifar10.py +60 -11
  89. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  90. mindspore/mindspore_backend_common.dll +0 -0
  91. mindspore/mindspore_backend_manager.dll +0 -0
  92. mindspore/mindspore_common.dll +0 -0
  93. mindspore/mindspore_core.dll +0 -0
  94. mindspore/mindspore_cpu_res_manager.dll +0 -0
  95. mindspore/mindspore_dump.dll +0 -0
  96. mindspore/mindspore_frontend.dll +0 -0
  97. mindspore/mindspore_glog.dll +0 -0
  98. mindspore/mindspore_memory_pool.dll +0 -0
  99. mindspore/mindspore_ms_backend.dll +0 -0
  100. mindspore/mindspore_ops.dll +0 -0
  101. mindspore/mindspore_ops_host.dll +0 -0
  102. mindspore/mindspore_ops_kernel_common.dll +0 -0
  103. mindspore/mindspore_profiler.dll +0 -0
  104. mindspore/mindspore_pyboost.dll +0 -0
  105. mindspore/mindspore_pynative.dll +0 -0
  106. mindspore/mindspore_res_manager.dll +0 -0
  107. mindspore/mindspore_runtime_pipeline.dll +0 -0
  108. mindspore/mint/__init__.py +6 -46
  109. mindspore/mint/distributed/__init__.py +1 -0
  110. mindspore/mint/distributed/distributed.py +212 -9
  111. mindspore/mint/nn/__init__.py +1 -1
  112. mindspore/mint/nn/functional.py +53 -6
  113. mindspore/mint/nn/layer/_functions.py +164 -294
  114. mindspore/mint/nn/layer/activation.py +8 -6
  115. mindspore/mint/nn/layer/conv.py +137 -101
  116. mindspore/mint/nn/layer/normalization.py +8 -22
  117. mindspore/mint/optim/adam.py +19 -18
  118. mindspore/mint/optim/adamw.py +14 -8
  119. mindspore/mint/optim/sgd.py +5 -5
  120. mindspore/nn/cell.py +328 -502
  121. mindspore/nn/grad/cell_grad.py +11 -12
  122. mindspore/nn/layer/activation.py +32 -34
  123. mindspore/nn/layer/basic.py +67 -64
  124. mindspore/nn/layer/channel_shuffle.py +4 -4
  125. mindspore/nn/layer/combined.py +4 -2
  126. mindspore/nn/layer/conv.py +117 -110
  127. mindspore/nn/layer/dense.py +9 -7
  128. mindspore/nn/layer/embedding.py +50 -52
  129. mindspore/nn/layer/image.py +37 -39
  130. mindspore/nn/layer/math.py +111 -112
  131. mindspore/nn/layer/normalization.py +56 -44
  132. mindspore/nn/layer/pooling.py +58 -63
  133. mindspore/nn/layer/rnn_cells.py +33 -33
  134. mindspore/nn/layer/rnns.py +56 -56
  135. mindspore/nn/layer/thor_layer.py +74 -73
  136. mindspore/nn/layer/transformer.py +11 -1
  137. mindspore/nn/learning_rate_schedule.py +20 -20
  138. mindspore/nn/loss/loss.py +79 -81
  139. mindspore/nn/optim/adam.py +3 -3
  140. mindspore/nn/optim/adasum.py +2 -2
  141. mindspore/nn/optim/asgd.py +2 -0
  142. mindspore/nn/optim/optimizer.py +1 -1
  143. mindspore/nn/optim/thor.py +2 -2
  144. mindspore/nn/probability/distribution/exponential.py +2 -1
  145. mindspore/nn/probability/distribution/poisson.py +2 -1
  146. mindspore/nn/sparse/sparse.py +3 -3
  147. mindspore/nn/wrap/cell_wrapper.py +34 -37
  148. mindspore/nn/wrap/grad_reducer.py +37 -37
  149. mindspore/nn/wrap/loss_scale.py +72 -74
  150. mindspore/numpy/array_creations.py +5 -5
  151. mindspore/numpy/fft.py +1 -1
  152. mindspore/numpy/math_ops.py +5 -5
  153. mindspore/opencv_core452.dll +0 -0
  154. mindspore/opencv_imgcodecs452.dll +0 -0
  155. mindspore/opencv_imgproc452.dll +0 -0
  156. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  157. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  158. mindspore/ops/_vmap/vmap_array_ops.py +31 -13
  159. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  160. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +42 -11
  161. mindspore/ops/auto_generate/gen_extend_func.py +23 -141
  162. mindspore/ops/auto_generate/gen_ops_def.py +727 -321
  163. mindspore/ops/auto_generate/gen_ops_prim.py +1721 -984
  164. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  165. mindspore/ops/composite/__init__.py +10 -0
  166. mindspore/ops/composite/base.py +8 -4
  167. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  168. mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
  169. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  170. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  171. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  172. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  173. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  174. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  175. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  176. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  177. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  178. mindspore/ops/function/__init__.py +3 -1
  179. mindspore/ops/function/_add_attr_func.py +11 -6
  180. mindspore/ops/function/array_func.py +9 -96
  181. mindspore/ops/function/debug_func.py +4 -3
  182. mindspore/ops/function/grad/grad_func.py +1 -1
  183. mindspore/ops/function/math_func.py +33 -540
  184. mindspore/ops/function/nn_func.py +28 -74
  185. mindspore/ops/function/other_func.py +4 -1
  186. mindspore/ops/function/random_func.py +44 -5
  187. mindspore/ops/function/vmap_func.py +2 -1
  188. mindspore/ops/functional.py +2 -3
  189. mindspore/ops/functional_overload.py +571 -6
  190. mindspore/ops/op_info_register.py +21 -0
  191. mindspore/ops/operations/__init__.py +16 -11
  192. mindspore/ops/operations/_custom_ops_utils.py +689 -34
  193. mindspore/ops/operations/_inner_ops.py +3 -6
  194. mindspore/ops/operations/_sequence_ops.py +1 -1
  195. mindspore/ops/operations/array_ops.py +2 -2
  196. mindspore/ops/operations/comm_ops.py +185 -26
  197. mindspore/ops/operations/custom_ops.py +294 -174
  198. mindspore/ops/operations/debug_ops.py +59 -4
  199. mindspore/ops/operations/image_ops.py +13 -13
  200. mindspore/ops/operations/manually_defined/ops_def.py +15 -16
  201. mindspore/ops/operations/math_ops.py +3 -4
  202. mindspore/ops/operations/nn_ops.py +7 -39
  203. mindspore/ops/primitive.py +6 -10
  204. mindspore/ops/tensor_method.py +47 -8
  205. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  206. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  207. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  208. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  209. mindspore/ops_generate/common/base_generator.py +14 -0
  210. mindspore/ops_generate/common/gen_constants.py +8 -3
  211. mindspore/ops_generate/common/gen_utils.py +0 -19
  212. mindspore/ops_generate/common/op_proto.py +11 -4
  213. mindspore/ops_generate/common/template.py +88 -11
  214. mindspore/ops_generate/gen_ops.py +1 -1
  215. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  216. mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
  217. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  218. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  219. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  220. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  221. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  222. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
  223. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  224. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  225. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  226. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  227. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  228. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  229. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  230. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  231. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  232. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  233. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  234. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  235. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  236. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  237. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  238. mindspore/parallel/_auto_parallel_context.py +11 -8
  239. mindspore/parallel/_cell_wrapper.py +113 -45
  240. mindspore/parallel/_parallel_serialization.py +1 -1
  241. mindspore/parallel/_ps_context.py +4 -6
  242. mindspore/parallel/_tensor.py +167 -12
  243. mindspore/parallel/_transformer/moe.py +1 -1
  244. mindspore/parallel/_transformer/transformer.py +13 -8
  245. mindspore/parallel/auto_parallel.py +14 -7
  246. mindspore/parallel/checkpoint_convert.py +3 -3
  247. mindspore/parallel/checkpoint_transform.py +11 -7
  248. mindspore/parallel/cluster/process_entity/_api.py +84 -48
  249. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  250. mindspore/parallel/cluster/run.py +43 -4
  251. mindspore/parallel/function/__init__.py +8 -1
  252. mindspore/parallel/function/reshard_func.py +6 -7
  253. mindspore/parallel/nn/__init__.py +15 -2
  254. mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
  255. mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
  256. mindspore/parallel/shard.py +3 -4
  257. mindspore/parallel/transform_safetensors.py +463 -174
  258. mindspore/profiler/__init__.py +2 -1
  259. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  260. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  261. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
  262. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  263. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  264. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  265. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  266. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  267. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  268. mindspore/profiler/analysis/task_manager.py +1 -1
  269. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  270. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  271. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
  272. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  273. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  274. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  275. mindspore/profiler/common/constant.py +16 -0
  276. mindspore/profiler/common/profiler_context.py +25 -27
  277. mindspore/profiler/common/profiler_info.py +0 -16
  278. mindspore/profiler/common/profiler_op_analyse.py +235 -0
  279. mindspore/profiler/common/profiler_output_path.py +23 -8
  280. mindspore/profiler/common/profiler_parameters.py +128 -35
  281. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  282. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  283. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  284. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  285. mindspore/profiler/dynamic_profiler.py +305 -314
  286. mindspore/profiler/envprofiler.py +12 -7
  287. mindspore/profiler/experimental_config.py +96 -6
  288. mindspore/profiler/mstx.py +33 -12
  289. mindspore/profiler/platform/__init__.py +2 -3
  290. mindspore/profiler/platform/npu_profiler.py +29 -19
  291. mindspore/profiler/profiler.py +35 -19
  292. mindspore/profiler/profiler_action_controller.py +64 -76
  293. mindspore/profiler/schedule.py +10 -4
  294. mindspore/rewrite/common/config.py +1 -0
  295. mindspore/rewrite/common/namer.py +1 -0
  296. mindspore/rewrite/common/namespace.py +1 -0
  297. mindspore/rewrite/node/node.py +31 -11
  298. mindspore/rewrite/parsers/assign_parser.py +1 -1
  299. mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
  300. mindspore/run_check/_check_version.py +7 -10
  301. mindspore/runtime/__init__.py +5 -5
  302. mindspore/runtime/event.py +10 -4
  303. mindspore/runtime/executor.py +60 -45
  304. mindspore/runtime/memory.py +30 -32
  305. mindspore/runtime/thread_bind_core.py +298 -164
  306. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  307. mindspore/swresample-4.dll +0 -0
  308. mindspore/swscale-6.dll +0 -0
  309. mindspore/tinyxml2.dll +0 -0
  310. mindspore/train/_utils.py +14 -4
  311. mindspore/train/amp.py +43 -20
  312. mindspore/train/callback/__init__.py +5 -5
  313. mindspore/train/callback/_checkpoint.py +3 -6
  314. mindspore/train/callback/_flops_collector.py +1 -1
  315. mindspore/train/callback/_landscape.py +0 -1
  316. mindspore/train/callback/_train_fault_tolerance.py +97 -16
  317. mindspore/train/data_sink.py +11 -2
  318. mindspore/train/dataset_helper.py +9 -0
  319. mindspore/train/model.py +135 -55
  320. mindspore/train/serialization.py +133 -111
  321. mindspore/train/summary/summary_record.py +13 -2
  322. mindspore/turbojpeg.dll +0 -0
  323. mindspore/utils/__init__.py +3 -2
  324. mindspore/utils/dryrun.py +0 -6
  325. mindspore/utils/runtime_execution_order_check.py +163 -77
  326. mindspore/utils/sdc_detect.py +68 -0
  327. mindspore/utils/utils.py +6 -9
  328. mindspore/version.py +1 -1
  329. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
  330. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +333 -371
  331. mindspore/_deprecated/jit.py +0 -198
  332. mindspore/experimental/es/__init__.py +0 -22
  333. mindspore/experimental/es/embedding_service.py +0 -891
  334. mindspore/experimental/es/embedding_service_layer.py +0 -581
  335. mindspore/profiler/parser/__init__.py +0 -14
  336. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  337. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  338. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  339. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  340. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  341. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  342. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  343. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  344. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  345. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  346. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  347. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  348. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  349. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  350. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  351. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  352. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  353. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  354. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  355. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  356. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  357. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  358. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  359. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  360. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  361. mindspore/profiler/parser/container.py +0 -229
  362. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  363. mindspore/profiler/parser/flops_parser.py +0 -531
  364. mindspore/profiler/parser/framework_enum.py +0 -111
  365. mindspore/profiler/parser/framework_parser.py +0 -464
  366. mindspore/profiler/parser/framework_struct.py +0 -61
  367. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  368. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  369. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  370. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  371. mindspore/profiler/parser/hccl_parser.py +0 -573
  372. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  373. mindspore/profiler/parser/integrator.py +0 -526
  374. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  375. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  376. mindspore/profiler/parser/minddata_parser.py +0 -186
  377. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  378. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  379. mindspore/profiler/parser/optime_parser.py +0 -250
  380. mindspore/profiler/parser/profiler_info.py +0 -213
  381. mindspore/profiler/parser/step_trace_parser.py +0 -666
  382. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
  383. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
  384. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
mindspore/train/model.py CHANGED
@@ -57,8 +57,10 @@ from mindspore.dataset.engine.datasets import _set_training_dataset, _reset_trai
57
57
  from mindspore.train import amp
58
58
  from mindspore._c_expression import _framework_profiler_step_start, _framework_profiler_step_end
59
59
  from mindspore._c_expression import _get_optimzer_timestamps
60
+ from mindspore._c_expression import clean_tdt_channel, _clean_rootinfo
60
61
 
61
62
  from mindspore.parallel._utils import _init_auto_parallel_context, _clear_auto_parallel_context
63
+ from .serialization import load_param_into_net
62
64
 
63
65
  def _transfer_tensor_to_tuple(inputs):
64
66
  """
@@ -130,7 +132,8 @@ def _handle_exception_info(obj, uce_env, tft, e):
130
132
  if not uce_env:
131
133
  logger.error("uce wrapper caught RuntimeError but uce not enable, enter MindIO TTP process.",
132
134
  exc_info=True)
133
- tft.tft_report_error(tft.ReportState.RS_UNKNOWN.value)
135
+ if tft:
136
+ tft.tft_report_error(tft.ReportState.RS_UNKNOWN.value)
134
137
  raise e
135
138
  e_str = str(e)
136
139
  logger.warning("uce wrapper caught RuntimeError e_str:{}".format(e_str))
@@ -151,6 +154,13 @@ def _handle_exception_info(obj, uce_env, tft, e):
151
154
  tft.tft_report_error(tft.ReportState.RS_UNKNOWN.value)
152
155
  raise e
153
156
  tft.tft_report_error(tft.ReportState.RS_UCE.value)
157
+ elif "HCCEError" in e_str:
158
+ logger.warning("uce wrapper caught HCCEError")
159
+ if obj.stop_been_called:
160
+ logger.warning("Received HCCEError after force stop been called, so report force stopped error to MindIO.")
161
+ tft.tft_report_error(tft.ReportState.RS_NORMAL.value)
162
+ else:
163
+ tft.tft_report_error(tft.ReportState.RS_HCCL_FAILED.value)
154
164
  elif "ForceStopError" in e_str:
155
165
  logger.warning("uce wrapper caught RuntimeError ForceStopError")
156
166
  force_stop_err = tft.ReportState.RS_NORMAL.value
@@ -165,6 +175,69 @@ def _handle_exception_info(obj, uce_env, tft, e):
165
175
  raise e
166
176
 
167
177
 
178
+ def _handle_training_result_error(model, tft_obj):
179
+ """
180
+ Handle training result error for resuming training.
181
+ """
182
+ ckpt_load_fn = tft_obj.ckpt_load_func
183
+ train_network = tft_obj.cb_params.train_network
184
+ logger.warning("Process training result error start.")
185
+ # 1. Clear tdt channel
186
+ logger.warning("Clean tdt channel.")
187
+ clean_tdt_channel()
188
+
189
+ # 2. Load checkpoint
190
+ logger.warning("Load checkpoint.")
191
+ new_param_dict, remove_redundancy = ckpt_load_fn()
192
+ param_not_load, ckpt_not_load = load_param_into_net(train_network, new_param_dict, True, remove_redundancy)
193
+ logger.warning(f"param_not_load: {param_not_load}")
194
+ logger.warning(f"ckpt_not_load: {ckpt_not_load}")
195
+ resume_epoch = new_param_dict.get('epoch_num')
196
+ resume_step = new_param_dict.get('step_num')
197
+ model._initial_step = int(resume_step.asnumpy())
198
+ logger.warning("Process training result error end.")
199
+ return (resume_epoch, resume_step)
200
+
201
+
202
+ def _calc_cb_initial_step(org_epoch, org_step, *args, **kwargs):
203
+ """calculate initial step for callback"""
204
+ train_dataset = args[1]
205
+ dataset_sink_mode = args[3] if len(args) > 3 else kwargs.get('dataset_sink_mode', True)
206
+ sink_size = args[4] if len(args) > 4 else kwargs.get('sink_size', -1)
207
+
208
+ cb_initial_step = 0
209
+ if dataset_sink_mode:
210
+ train_dataset.set_init_step(org_epoch)
211
+ dataset_size = train_dataset.get_dataset_size()
212
+ if sink_size != -1:
213
+ cb_initial_step = org_epoch * sink_size + org_step
214
+ else:
215
+ cb_initial_step = org_epoch * dataset_size + org_step
216
+ else:
217
+ train_dataset.set_init_step(org_step)
218
+ cb_initial_step = org_step
219
+ if hasattr(train_dataset, '_dataset_helper'):
220
+ dataset_helper = train_dataset._dataset_helper
221
+ _reset_training_dataset(cb_initial_step, dataset_helper.iter.dataset.get_dataset_size())
222
+ return cb_initial_step
223
+
224
+
225
+ def _update_ckpt_callback_info(resume_train_step, **kwargs):
226
+ """
227
+ Update checkpoint callback internal state
228
+ """
229
+ ckpt_obj = None
230
+ if kwargs.get('callbacks') and isinstance(kwargs.get('callbacks'), ModelCheckpoint):
231
+ ckpt_obj = kwargs.get('callbacks')
232
+ if kwargs.get('callbacks') and isinstance(kwargs.get('callbacks'), list):
233
+ for item in kwargs.get('callbacks'):
234
+ if isinstance(item, ModelCheckpoint):
235
+ ckpt_obj = item
236
+ if ckpt_obj is not None:
237
+ ckpt_obj._last_triggered_step = 0
238
+ ckpt_obj._append_step_num = resume_train_step
239
+
240
+
168
241
  def _handle_tft(func):
169
242
  """
170
243
  Decorator function, which starts uce handle process when an exception occurs during training.
@@ -180,42 +253,35 @@ def _handle_tft(func):
180
253
  if isinstance(item, TrainFaultTolerance):
181
254
  obj = item
182
255
  if obj:
183
- tft = obj.tft
184
256
  tft_env = os.getenv("MS_ENABLE_TFT", "")
185
- uce_env = "UCE:1" in tft_env or "ARF:1" in tft_env
257
+ uce_env = "UCE:1" in tft_env or "ARF:1" in tft_env or "HCCE:1" in tft_env
258
+ tre_env = "TRE:1" in tft_env
186
259
  while True:
187
260
  try:
188
261
  return func(self, *args, **kwargs)
189
262
  except RuntimeError as e:
190
- _handle_exception_info(obj, uce_env, tft, e)
191
- ret = tft.tft_wait_next_action()
192
- if ret == tft.Action.EXIT.value:
193
- raise e
194
- repair_step = tft.tft_get_repair_step()
195
- logger.warning(
196
- "uce wrapper caught repair finish REPAIR STEP: {} batch_num:{}".format(repair_step,
197
- self.batch_num))
263
+ if tre_env and 'TREError' in str(e):
264
+ _, resume_step = _handle_training_result_error(self, obj)
265
+ repair_step = int(resume_step.asnumpy())
266
+ _update_ckpt_callback_info(repair_step, **kwargs)
267
+ logger.warning(f'Resume training after TREError from step {repair_step}.')
268
+ else:
269
+ _handle_exception_info(obj, uce_env, obj.tft, e)
270
+ ret = obj.tft.tft_wait_next_action()
271
+ if ret == obj.tft.Action.EXIT.value:
272
+ raise e
273
+ obj.stop_been_called = False
274
+ repair_step = obj.tft.tft_get_repair_step()
275
+ logger.warning(
276
+ "uce wrapper caught repair finish REPAIR STEP: {} batch_num:{}".format(repair_step,
277
+ self.batch_num))
198
278
  initial_epoch = int(repair_step / self.batch_num)
199
279
  initial_step = repair_step % self.batch_num
200
280
  kwargs["initial_epoch"] = initial_epoch
201
-
202
- train_dataset = args[1]
203
- dataset_sink_mode = args[3] if len(args) > 3 else kwargs.get('dataset_sink_mode', True)
204
- sink_size = args[4] if len(args) > 4 else kwargs.get('sink_size', -1)
205
-
206
- cb_initial_step = 0
207
- if dataset_sink_mode:
208
- train_dataset.set_init_step(initial_epoch)
209
- dataset_size = train_dataset.get_dataset_size()
210
- if sink_size != -1:
211
- cb_initial_step = initial_epoch * sink_size + initial_step
212
- else:
213
- cb_initial_step = initial_epoch * dataset_size + initial_step
214
- else:
215
- train_dataset.set_init_step(initial_step)
216
- cb_initial_step = initial_step
217
-
218
- kwargs["initial_step"] = cb_initial_step
281
+ cb_initial_step = _calc_cb_initial_step(initial_epoch, initial_step, *args, **kwargs)
282
+ if not self.enable_tre:
283
+ kwargs["initial_step"] = cb_initial_step
284
+ self._initial_step = 0
219
285
  # reset all accu grads to zero
220
286
  obj._reset_acc_grads()
221
287
  logger.warning(
@@ -223,8 +289,9 @@ def _handle_tft(func):
223
289
  cb_initial_step))
224
290
  continue
225
291
  except BaseException as e:
226
- logger.error("uce wrapper caught BaseException error, enter MindIO TTP process.", exc_info=True)
227
- tft.tft_report_error(tft.ReportState.RS_UNKNOWN.value)
292
+ if obj.tft:
293
+ logger.error("uce wrapper caught BaseException error, enter MindIO TTP process.", exc_info=True)
294
+ obj.tft.tft_report_error(obj.tft.ReportState.RS_UNKNOWN.value)
228
295
  raise e
229
296
  else:
230
297
  return func(self, *args, **kwargs)
@@ -384,6 +451,11 @@ def _set_with_processed_inputs(network, inputs):
384
451
  "Reset inputs from a process inputs, should be a list/tuple or a dict, but got %s!" % str(inputs))
385
452
 
386
453
 
454
+ def _check_tft_reset_dataset():
455
+ env_tft = os.getenv("MS_ENABLE_TFT", "")
456
+ return any([v in env_tft for v in ["TRE:1", "UCE:1", "HCCE:1", "ARF:1"]])
457
+
458
+
387
459
  class Model:
388
460
  """
389
461
  High-Level API for training or inference.
@@ -501,6 +573,10 @@ class Model:
501
573
  self._lite_infer = True # if backend lite infer fails, set False
502
574
  self._mindspore_lite_model_group_id = id(self) & 0xFFFF
503
575
  self.batch_num = -1
576
+ self.enable_tre = "TRE:1" in os.getenv("MS_ENABLE_TFT", "")
577
+ self.enable_hcce = "HCCE:1" in os.getenv("MS_ENABLE_TFT", "")
578
+ self._initial_step = None
579
+ self._need_reset_data = _check_tft_reset_dataset()
504
580
  _clear_auto_parallel_context(self._network)
505
581
 
506
582
  def _check_for_graph_cell(self, kwargs):
@@ -700,7 +776,7 @@ class Model:
700
776
  logger.info("Begin to connect network with dataset.")
701
777
  network = connect_network_with_dataset(network, dataset_helper)
702
778
 
703
- if _get_recovery_context("enable_recovery") and is_train:
779
+ if (_get_recovery_context("enable_recovery") or self._need_reset_data) and is_train:
704
780
  _set_training_dataset(dataset_helper)
705
781
 
706
782
  network.set_train(is_train)
@@ -957,6 +1033,7 @@ class Model:
957
1033
  cb_params.latest_ckpt_file = None
958
1034
  cb_params.loss_scale_mananger = self._loss_scale_manager
959
1035
  cb_params.is_arf = _get_recovery_context("is_arf")
1036
+ cb_params.initial_step = self._initial_step
960
1037
 
961
1038
  # build callback list
962
1039
  with _CallbackManager(callbacks) as list_callback:
@@ -995,7 +1072,7 @@ class Model:
995
1072
  initial_epoch (int): Epoch at which to start train, it used for resuming a previous training run.
996
1073
  Default: 0.
997
1074
  """
998
- is_graph = (context.get_context("mode") == context.GRAPH_MODE)
1075
+ is_graph = context.get_context("mode") == context.GRAPH_MODE
999
1076
  dataset_size = train_dataset.get_dataset_size()
1000
1077
  if dataset_size % sink_size != 0:
1001
1078
  logger.info("In dataset_sink mode (dataset_size % sink_size) should equal to 0, "
@@ -1064,6 +1141,7 @@ class Model:
1064
1141
  if cb_params.is_arf:
1065
1142
  cb_params.is_arf = False
1066
1143
  _set_recovery_context(is_arf=False)
1144
+ _clean_rootinfo()
1067
1145
 
1068
1146
  # Embedding cache server only run one step.
1069
1147
  if is_embedding_cache_server:
@@ -1278,6 +1356,7 @@ class Model:
1278
1356
  if cb_params.is_arf:
1279
1357
  cb_params.is_arf = False
1280
1358
  _set_recovery_context(is_arf=False)
1359
+ _clean_rootinfo()
1281
1360
  # Embedding cache server only run one step.
1282
1361
  if is_embedding_cache_server:
1283
1362
  break
@@ -2241,6 +2320,7 @@ class Model:
2241
2320
 
2242
2321
  Examples:
2243
2322
  >>> import numpy as np
2323
+ >>> import mindspore as ms
2244
2324
  >>> import mindspore.nn as nn
2245
2325
  >>> from mindspore import Tensor
2246
2326
  >>> from mindspore.train import Model
@@ -2250,28 +2330,28 @@ class Model:
2250
2330
  >>> from mindspore.parallel.auto_parallel import AutoParallel
2251
2331
  >>>
2252
2332
  >>> class Net(nn.Cell):
2253
- >>> def __init__(self):
2254
- >>> super(Net, self).__init__()
2255
- >>> self.fc1 = nn.Dense(128, 768, activation='relu')
2256
- >>> self.fc2 = nn.Dense(128, 768, activation='relu')
2257
- >>> self.fc3 = nn.Dense(128, 768, activation='relu')
2258
- >>> self.fc4 = nn.Dense(768, 768, activation='relu')
2259
- >>> self.relu4 = nn.ReLU()
2260
- >>> self.relu5 = nn.ReLU()
2261
- >>> self.transpose = P.Transpose()
2262
- >>> self.matmul1 = P.MatMul()
2263
- >>> self.matmul2 = P.MatMul()
2264
- >>>
2265
- >>> def construct(self, x):
2266
- >>> q = self.fc1(x)
2267
- >>> k = self.fc2(x)
2268
- >>> v = self.fc3(x)
2269
- >>> k = self.transpose(k, (1, 0))
2270
- >>> c = self.relu4(self.matmul1(q, k))
2271
- >>> s = self.relu5(self.matmul2(c, v))
2272
- >>> s = self.fc4(s)
2273
- >>> return s
2274
- >>>
2333
+ ... def __init__(self):
2334
+ ... super(Net, self).__init__()
2335
+ ... self.fc1 = nn.Dense(128, 768, activation='relu')
2336
+ ... self.fc2 = nn.Dense(128, 768, activation='relu')
2337
+ ... self.fc3 = nn.Dense(128, 768, activation='relu')
2338
+ ... self.fc4 = nn.Dense(768, 768, activation='relu')
2339
+ ... self.relu4 = nn.ReLU()
2340
+ ... self.relu5 = nn.ReLU()
2341
+ ... self.transpose = P.Transpose()
2342
+ ... self.matmul1 = P.MatMul()
2343
+ ... self.matmul2 = P.MatMul()
2344
+ ...
2345
+ ... def construct(self, x):
2346
+ ... q = self.fc1(x)
2347
+ ... k = self.fc2(x)
2348
+ ... v = self.fc3(x)
2349
+ ... k = self.transpose(k, (1, 0))
2350
+ ... c = self.relu4(self.matmul1(q, k))
2351
+ ... s = self.relu5(self.matmul2(c, v))
2352
+ ... s = self.fc4(s)
2353
+ ... return s
2354
+ ...
2275
2355
  >>> ms.set_context(mode=ms.GRAPH_MODE)
2276
2356
  >>> init()
2277
2357
  >>> inputs = Tensor(np.ones([32, 128]).astype(np.float32))