mindspore 2.7.0rc1__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (370) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +5 -2
  3. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +2 -2
  7. mindspore/_extends/builtin_operations.py +3 -3
  8. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  9. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  10. mindspore/_extends/parse/__init__.py +3 -3
  11. mindspore/_extends/parse/compile_config.py +24 -1
  12. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -3
  13. mindspore/_extends/parse/parser.py +28 -22
  14. mindspore/_extends/parse/resources.py +1 -1
  15. mindspore/_extends/parse/standard_method.py +23 -2
  16. mindspore/_extends/parse/trope.py +2 -1
  17. mindspore/_extends/pijit/pijit_func_white_list.py +9 -27
  18. mindspore/amp.py +0 -18
  19. mindspore/avcodec-59.dll +0 -0
  20. mindspore/avdevice-59.dll +0 -0
  21. mindspore/avfilter-8.dll +0 -0
  22. mindspore/avformat-59.dll +0 -0
  23. mindspore/avutil-57.dll +0 -0
  24. mindspore/boost/base.py +29 -2
  25. mindspore/common/__init__.py +18 -12
  26. mindspore/common/_decorator.py +3 -2
  27. mindspore/common/_grad_function.py +3 -1
  28. mindspore/common/_tensor_cpp_method.py +1 -1
  29. mindspore/common/_tensor_docs.py +371 -96
  30. mindspore/common/_utils.py +7 -43
  31. mindspore/common/api.py +434 -135
  32. mindspore/common/dtype.py +98 -57
  33. mindspore/common/dump.py +7 -108
  34. mindspore/common/dynamic_shape/__init__.py +0 -0
  35. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +15 -23
  36. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  37. mindspore/common/file_system.py +59 -9
  38. mindspore/common/hook_handle.py +82 -3
  39. mindspore/common/jit_config.py +5 -1
  40. mindspore/common/jit_trace.py +27 -12
  41. mindspore/common/lazy_inline.py +5 -3
  42. mindspore/common/np_dtype.py +3 -3
  43. mindspore/common/parameter.py +17 -127
  44. mindspore/common/recompute.py +4 -13
  45. mindspore/common/tensor.py +50 -217
  46. mindspore/communication/_comm_helper.py +11 -1
  47. mindspore/communication/comm_func.py +138 -4
  48. mindspore/communication/management.py +85 -1
  49. mindspore/config/op_info.config +0 -15
  50. mindspore/context.py +20 -106
  51. mindspore/dataset/__init__.py +1 -1
  52. mindspore/dataset/audio/transforms.py +1 -1
  53. mindspore/dataset/core/config.py +35 -1
  54. mindspore/dataset/engine/datasets.py +338 -319
  55. mindspore/dataset/engine/datasets_user_defined.py +38 -22
  56. mindspore/dataset/engine/datasets_vision.py +1 -1
  57. mindspore/dataset/engine/validators.py +1 -15
  58. mindspore/dataset/transforms/c_transforms.py +2 -2
  59. mindspore/dataset/transforms/transforms.py +3 -3
  60. mindspore/dataset/vision/__init__.py +1 -1
  61. mindspore/dataset/vision/py_transforms.py +8 -8
  62. mindspore/dataset/vision/transforms.py +17 -5
  63. mindspore/dataset/vision/utils.py +632 -21
  64. mindspore/device_context/ascend/op_tuning.py +35 -1
  65. mindspore/dnnl.dll +0 -0
  66. mindspore/{profiler/common/validator → graph}/__init__.py +9 -1
  67. mindspore/graph/custom_pass.py +55 -0
  68. mindspore/include/api/cell.h +28 -4
  69. mindspore/include/api/cfg.h +24 -7
  70. mindspore/include/api/context.h +1 -0
  71. mindspore/include/api/delegate.h +0 -2
  72. mindspore/include/api/dual_abi_helper.h +100 -19
  73. mindspore/include/api/graph.h +14 -1
  74. mindspore/include/api/kernel.h +16 -3
  75. mindspore/include/api/kernel_api.h +9 -1
  76. mindspore/include/api/metrics/accuracy.h +9 -0
  77. mindspore/include/api/model.h +5 -1
  78. mindspore/include/api/model_group.h +4 -0
  79. mindspore/include/api/model_parallel_runner.h +2 -0
  80. mindspore/include/api/status.h +48 -10
  81. mindspore/include/api/types.h +6 -1
  82. mindspore/include/dataset/constants.h +9 -0
  83. mindspore/include/dataset/execute.h +2 -2
  84. mindspore/jpeg62.dll +0 -0
  85. mindspore/mindrecord/__init__.py +3 -3
  86. mindspore/mindrecord/common/exceptions.py +1 -0
  87. mindspore/mindrecord/config.py +1 -1
  88. mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
  89. mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
  90. mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
  91. mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
  92. mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
  93. mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
  94. mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
  95. mindspore/mindrecord/filereader.py +4 -4
  96. mindspore/mindrecord/filewriter.py +5 -5
  97. mindspore/mindrecord/mindpage.py +2 -2
  98. mindspore/mindrecord/tools/cifar10.py +4 -3
  99. mindspore/mindrecord/tools/cifar100.py +1 -1
  100. mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
  101. mindspore/mindrecord/tools/cifar10_to_mr.py +6 -6
  102. mindspore/mindrecord/tools/csv_to_mr.py +1 -1
  103. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  104. mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
  105. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
  106. mindspore/mindspore_backend_common.dll +0 -0
  107. mindspore/mindspore_backend_manager.dll +0 -0
  108. mindspore/mindspore_cluster.dll +0 -0
  109. mindspore/mindspore_common.dll +0 -0
  110. mindspore/mindspore_core.dll +0 -0
  111. mindspore/mindspore_cpu.dll +0 -0
  112. mindspore/mindspore_dump.dll +0 -0
  113. mindspore/mindspore_frontend.dll +0 -0
  114. mindspore/mindspore_glog.dll +0 -0
  115. mindspore/mindspore_hardware_abstract.dll +0 -0
  116. mindspore/mindspore_memory_pool.dll +0 -0
  117. mindspore/mindspore_ms_backend.dll +0 -0
  118. mindspore/mindspore_ops.dll +0 -0
  119. mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
  120. mindspore/mindspore_profiler.dll +0 -0
  121. mindspore/mindspore_pyboost.dll +0 -0
  122. mindspore/mindspore_pynative.dll +0 -0
  123. mindspore/mindspore_runtime_pipeline.dll +0 -0
  124. mindspore/mindspore_runtime_utils.dll +0 -0
  125. mindspore/mindspore_tools.dll +0 -0
  126. mindspore/mint/__init__.py +15 -10
  127. mindspore/mint/distributed/__init__.py +4 -0
  128. mindspore/mint/distributed/distributed.py +392 -69
  129. mindspore/mint/nn/__init__.py +2 -16
  130. mindspore/mint/nn/functional.py +4 -110
  131. mindspore/mint/nn/layer/__init__.py +0 -2
  132. mindspore/mint/nn/layer/_functions.py +1 -2
  133. mindspore/mint/nn/layer/activation.py +0 -6
  134. mindspore/mint/nn/layer/basic.py +0 -47
  135. mindspore/mint/nn/layer/conv.py +10 -10
  136. mindspore/mint/nn/layer/normalization.py +11 -16
  137. mindspore/mint/nn/layer/pooling.py +0 -4
  138. mindspore/nn/__init__.py +1 -3
  139. mindspore/nn/cell.py +231 -239
  140. mindspore/nn/layer/activation.py +4 -2
  141. mindspore/nn/layer/basic.py +56 -14
  142. mindspore/nn/layer/container.py +16 -0
  143. mindspore/nn/layer/embedding.py +4 -169
  144. mindspore/nn/layer/image.py +1 -1
  145. mindspore/nn/layer/normalization.py +2 -1
  146. mindspore/nn/layer/thor_layer.py +4 -85
  147. mindspore/nn/optim/ada_grad.py +0 -1
  148. mindspore/nn/optim/adafactor.py +0 -1
  149. mindspore/nn/optim/adam.py +32 -127
  150. mindspore/nn/optim/adamax.py +0 -1
  151. mindspore/nn/optim/asgd.py +0 -1
  152. mindspore/nn/optim/ftrl.py +8 -102
  153. mindspore/nn/optim/lamb.py +1 -4
  154. mindspore/nn/optim/lars.py +0 -3
  155. mindspore/nn/optim/lazyadam.py +25 -218
  156. mindspore/nn/optim/momentum.py +5 -43
  157. mindspore/nn/optim/optimizer.py +6 -55
  158. mindspore/nn/optim/proximal_ada_grad.py +0 -1
  159. mindspore/nn/optim/rmsprop.py +0 -1
  160. mindspore/nn/optim/rprop.py +0 -1
  161. mindspore/nn/optim/sgd.py +0 -1
  162. mindspore/nn/optim/tft_wrapper.py +2 -4
  163. mindspore/nn/optim/thor.py +0 -2
  164. mindspore/nn/probability/bijector/bijector.py +7 -8
  165. mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
  166. mindspore/nn/probability/bijector/power_transform.py +20 -21
  167. mindspore/nn/probability/bijector/scalar_affine.py +5 -5
  168. mindspore/nn/probability/bijector/softplus.py +13 -14
  169. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  170. mindspore/nn/wrap/cell_wrapper.py +39 -5
  171. mindspore/nn/wrap/grad_reducer.py +4 -89
  172. mindspore/numpy/array_creations.py +4 -4
  173. mindspore/numpy/fft.py +9 -9
  174. mindspore/numpy/utils_const.py +1 -1
  175. mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
  176. mindspore/onnx/onnx_export.py +137 -0
  177. mindspore/opencv_core4110.dll +0 -0
  178. mindspore/opencv_imgcodecs4110.dll +0 -0
  179. mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
  180. mindspore/ops/__init__.py +2 -0
  181. mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
  182. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  183. mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
  184. mindspore/ops/_op_impl/cpu/__init__.py +1 -5
  185. mindspore/ops/_op_impl/cpu/{buffer_append.py → joinedstr_op.py} +8 -8
  186. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +28 -24
  187. mindspore/ops/auto_generate/gen_extend_func.py +6 -11
  188. mindspore/ops/auto_generate/gen_ops_def.py +385 -154
  189. mindspore/ops/auto_generate/gen_ops_prim.py +5676 -5167
  190. mindspore/ops/communication.py +97 -0
  191. mindspore/ops/composite/__init__.py +5 -2
  192. mindspore/ops/composite/base.py +16 -2
  193. mindspore/ops/composite/multitype_ops/__init__.py +3 -1
  194. mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
  195. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  196. mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
  197. mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
  198. mindspore/ops/function/__init__.py +2 -0
  199. mindspore/ops/function/array_func.py +24 -18
  200. mindspore/ops/function/comm_func.py +3883 -0
  201. mindspore/ops/function/debug_func.py +7 -6
  202. mindspore/ops/function/grad/grad_func.py +4 -12
  203. mindspore/ops/function/math_func.py +89 -86
  204. mindspore/ops/function/nn_func.py +92 -313
  205. mindspore/ops/function/random_func.py +9 -18
  206. mindspore/ops/functional.py +4 -1
  207. mindspore/ops/functional_overload.py +377 -30
  208. mindspore/ops/operations/__init__.py +2 -5
  209. mindspore/ops/operations/_custom_ops_utils.py +7 -9
  210. mindspore/ops/operations/_inner_ops.py +12 -50
  211. mindspore/ops/operations/_rl_inner_ops.py +0 -933
  212. mindspore/ops/operations/array_ops.py +5 -50
  213. mindspore/ops/operations/comm_ops.py +95 -17
  214. mindspore/ops/operations/custom_ops.py +237 -22
  215. mindspore/ops/operations/debug_ops.py +33 -35
  216. mindspore/ops/operations/manually_defined/ops_def.py +39 -318
  217. mindspore/ops/operations/math_ops.py +5 -5
  218. mindspore/ops/operations/nn_ops.py +3 -3
  219. mindspore/ops/operations/sparse_ops.py +0 -83
  220. mindspore/ops/primitive.py +4 -27
  221. mindspore/ops/tensor_method.py +88 -10
  222. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
  223. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
  224. mindspore/ops_generate/api/functions_cc_generator.py +53 -4
  225. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
  226. mindspore/ops_generate/common/gen_constants.py +11 -10
  227. mindspore/ops_generate/common/op_proto.py +18 -1
  228. mindspore/ops_generate/common/template.py +102 -245
  229. mindspore/ops_generate/common/template_utils.py +212 -0
  230. mindspore/ops_generate/gen_custom_ops.py +69 -0
  231. mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
  232. mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
  233. mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
  234. mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
  235. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
  236. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
  237. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
  238. mindspore/ops_generate/pyboost/gen_pyboost_func.py +0 -16
  239. mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
  240. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
  241. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
  242. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
  243. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
  244. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
  245. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
  246. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
  247. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
  248. mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
  249. mindspore/ops_generate/resources/yaml_loader.py +13 -0
  250. mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
  251. mindspore/parallel/_auto_parallel_context.py +5 -15
  252. mindspore/parallel/_cell_wrapper.py +1 -1
  253. mindspore/parallel/_parallel_serialization.py +4 -6
  254. mindspore/parallel/_ps_context.py +2 -2
  255. mindspore/parallel/_utils.py +34 -17
  256. mindspore/parallel/auto_parallel.py +23 -9
  257. mindspore/parallel/checkpoint_transform.py +20 -2
  258. mindspore/parallel/cluster/process_entity/_api.py +28 -33
  259. mindspore/parallel/cluster/process_entity/_utils.py +9 -5
  260. mindspore/parallel/cluster/run.py +5 -3
  261. mindspore/{experimental/llm_boost/ascend_native → parallel/distributed}/__init__.py +21 -22
  262. mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
  263. mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
  264. mindspore/parallel/function/reshard_func.py +6 -5
  265. mindspore/parallel/nn/parallel_cell_wrapper.py +40 -3
  266. mindspore/parallel/nn/parallel_grad_reducer.py +0 -8
  267. mindspore/parallel/shard.py +7 -21
  268. mindspore/parallel/strategy.py +336 -0
  269. mindspore/parallel/transform_safetensors.py +127 -20
  270. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +13 -9
  271. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
  272. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
  273. mindspore/profiler/common/constant.py +5 -0
  274. mindspore/profiler/common/file_manager.py +9 -0
  275. mindspore/profiler/common/msprof_cmd_tool.py +40 -4
  276. mindspore/profiler/common/path_manager.py +65 -24
  277. mindspore/profiler/common/profiler_context.py +27 -14
  278. mindspore/profiler/common/profiler_info.py +3 -3
  279. mindspore/profiler/common/profiler_meta_data.py +1 -0
  280. mindspore/profiler/common/profiler_op_analyse.py +10 -6
  281. mindspore/profiler/common/profiler_path_manager.py +13 -0
  282. mindspore/profiler/common/util.py +30 -3
  283. mindspore/profiler/dynamic_profiler.py +91 -46
  284. mindspore/profiler/envprofiler.py +30 -5
  285. mindspore/profiler/experimental_config.py +18 -2
  286. mindspore/profiler/platform/cpu_profiler.py +10 -4
  287. mindspore/profiler/platform/npu_profiler.py +34 -7
  288. mindspore/profiler/profiler.py +193 -145
  289. mindspore/profiler/profiler_action_controller.py +1 -1
  290. mindspore/profiler/profiler_interface.py +2 -2
  291. mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
  292. mindspore/run_check/_check_version.py +108 -24
  293. mindspore/runtime/__init__.py +9 -6
  294. mindspore/runtime/executor.py +35 -0
  295. mindspore/runtime/memory.py +113 -0
  296. mindspore/runtime/thread_bind_core.py +1 -1
  297. mindspore/swresample-4.dll +0 -0
  298. mindspore/swscale-6.dll +0 -0
  299. mindspore/tinyxml2.dll +0 -0
  300. mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
  301. mindspore/tools/data_dump.py +130 -0
  302. mindspore/tools/sdc_detect.py +91 -0
  303. mindspore/tools/stress_detect.py +63 -0
  304. mindspore/train/__init__.py +6 -6
  305. mindspore/train/_utils.py +8 -21
  306. mindspore/train/amp.py +6 -7
  307. mindspore/train/callback/_callback.py +2 -1
  308. mindspore/train/callback/_checkpoint.py +1 -17
  309. mindspore/train/callback/_flops_collector.py +10 -6
  310. mindspore/train/callback/_train_fault_tolerance.py +72 -25
  311. mindspore/train/data_sink.py +5 -9
  312. mindspore/train/dataset_helper.py +5 -5
  313. mindspore/train/model.py +41 -230
  314. mindspore/train/serialization.py +160 -401
  315. mindspore/train/train_thor/model_thor.py +2 -2
  316. mindspore/turbojpeg.dll +0 -0
  317. mindspore/utils/__init__.py +6 -3
  318. mindspore/utils/dlpack.py +92 -0
  319. mindspore/utils/dryrun.py +1 -1
  320. mindspore/utils/runtime_execution_order_check.py +10 -0
  321. mindspore/utils/sdc_detect.py +14 -12
  322. mindspore/utils/stress_detect.py +43 -0
  323. mindspore/utils/utils.py +152 -16
  324. mindspore/version.py +1 -1
  325. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
  326. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/RECORD +330 -344
  327. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  328. mindspore/communication/_hccl_management.py +0 -297
  329. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -207
  330. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
  331. mindspore/experimental/llm_boost/atb/__init__.py +0 -23
  332. mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
  333. mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
  334. mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
  335. mindspore/experimental/llm_boost/register.py +0 -130
  336. mindspore/experimental/llm_boost/utils.py +0 -31
  337. mindspore/include/OWNERS +0 -7
  338. mindspore/mindspore_cpu_res_manager.dll +0 -0
  339. mindspore/mindspore_ops_kernel_common.dll +0 -0
  340. mindspore/mindspore_res_manager.dll +0 -0
  341. mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
  342. mindspore/nn/reinforcement/_batch_read_write.py +0 -142
  343. mindspore/nn/reinforcement/_tensors_queue.py +0 -152
  344. mindspore/nn/reinforcement/tensor_array.py +0 -145
  345. mindspore/opencv_core452.dll +0 -0
  346. mindspore/opencv_imgcodecs452.dll +0 -0
  347. mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
  348. mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
  349. mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
  350. mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
  351. mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
  352. mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
  353. mindspore/ops/operations/_tensor_array.py +0 -359
  354. mindspore/ops/operations/rl_ops.py +0 -288
  355. mindspore/parallel/_offload_context.py +0 -275
  356. mindspore/parallel/_recovery_context.py +0 -115
  357. mindspore/parallel/_transformer/__init__.py +0 -35
  358. mindspore/parallel/_transformer/layers.py +0 -765
  359. mindspore/parallel/_transformer/loss.py +0 -251
  360. mindspore/parallel/_transformer/moe.py +0 -693
  361. mindspore/parallel/_transformer/op_parallel_config.py +0 -222
  362. mindspore/parallel/_transformer/transformer.py +0 -3124
  363. mindspore/parallel/mpi/_mpi_config.py +0 -116
  364. mindspore/profiler/common/validator/validate_path.py +0 -84
  365. mindspore/train/memory_profiling_pb2.py +0 -298
  366. mindspore/utils/hooks.py +0 -81
  367. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  368. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
  369. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
  370. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,336 @@
1
+ # Copyright 2025 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """Checkpoint strategy info"""
16
+ from __future__ import absolute_import
17
+
18
+ __all__ = ["get_strategy_metadata", "get_current_strategy_metadata", "enable_save_strategy_online", \
19
+ "clear_strategy_metadata"]
20
+
21
+ from itertools import chain
22
+ from typing import Sequence, Union, Tuple, List, Dict
23
+ from types import SimpleNamespace
24
+
25
+ import numpy as np
26
+
27
+ from mindspore import log as logger
28
+ from mindspore._c_expression import StrategyInfo
29
+ from mindspore._c_expression import StrategyLayout
30
+ from mindspore.parallel.shard import Layout
31
+
32
+ LayoutInfo = Tuple[Layout, str, str]
33
+ StrOrTuple = Union[str, Tuple["StrOrTuple", ...], List["StrOrTuple"]]
34
+
35
+
36
+ def get_strategy_metadata(network, rank_id=None) -> Dict[int, Dict[str, List[LayoutInfo]]]:
37
+ """
38
+ Get all params strategy info or specific rank strategy info in this cell.
39
+ For more information on layouts, please refer to: :class:`mindspore.parallel.Layout`.
40
+
41
+ Args:
42
+ network (str): The network name.
43
+ rank_id (int, optional): The rank id of the process on which this cell will be launched.
44
+ Defaults to ``None``, which means strategy metadata for all ranks will be returned.
45
+
46
+ Returns:
47
+ Dict. A dictionary containing the parameter slicing strategies for either all ranks or a specific rank.
48
+ The key is `rank_id`, and the value is the slicing strategy for all parameters on that rank.
49
+ Within each rank's strategy, the key is the parameter name, and the value is the slicing strategy.
50
+ If a `rank_id` is specified, the dictionary returns the strategy information for that specific rank.
51
+ Otherwise, it returns the strategy information for all ranks in the network. If not supported, returns None.
52
+
53
+ Examples:
54
+ >>> import mindspore as ms
55
+ >>> from mindspore import nn
56
+ >>> from mindspore.communication import init
57
+ >>> from mindspore.nn.utils import no_init_parameters
58
+ >>> from mindspore.parallel.auto_parallel import AutoParallel
59
+ >>> from mindspore.train import Model
60
+ >>> from mindspore.parallel.strategy import get_strategy_metadata, get_current_strategy_metadata,
61
+ ... enable_save_strategy_online, clear_strategy_metadata
62
+ >>>
63
+ >>> ms.set_context(mode=ms.GRAPH_MODE)
64
+ >>> init()
65
+ >>> ms.set_seed(1)
66
+ >>>
67
+ >>> # Define the network structure of LeNet5. Refer to
68
+ >>> # https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/lenet.py
69
+ >>> with no_init_parameters():
70
+ ... net = LeNet5()
71
+ ... optim = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
72
+ >>>
73
+ >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
74
+ >>> train_net = AutoParallel(net, parallel_mode="semi_auto")
75
+ >>> model = Model(network=train_net, loss_fn=loss, optimizer=optim, metrics=None)
76
+ >>>
77
+ >>> # Create the dataset taking MNIST as an example. Refer to
78
+ >>> # https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/mnist.py
79
+ >>> dataset = create_dataset()
80
+ >>>
81
+ >>> enable_save_strategy_online()
82
+ >>> model.train(2, dataset)
83
+ >>>
84
+ >>> global_info = get_strategy_metadata(network=model.train_network)
85
+ >>> rank0_info = get_strategy_metadata(network=model.train_network, rank_id=0)
86
+ >>> local_info = get_current_strategy_metadata(network=model.train_network)
87
+ >>> clear_strategy_metadata()
88
+ """
89
+ return _NetStrategyInfo(network, global_layout=None, local_layout=None).get_rank_layout(rank_id)
90
+
91
+
92
+ def get_current_strategy_metadata(network) -> Dict[int, Dict[str, List[LayoutInfo]]]:
93
+ """
94
+ Get parameters dictionary of cur rank of the network.
95
+
96
+ Args:
97
+ network(str): The network name.
98
+
99
+ Returns:
100
+ Dict. The key is 0 (representing the local rank), and the value is the slicing strategy for all parameters.
101
+ The key within the value represents the parameter name, and the value is the corresponding slicing strategy \
102
+ for that parameter. If not supported, returns None.
103
+ """
104
+ return _NetStrategyInfo(network, global_layout=None, local_layout=None).get_local_rank_layout()
105
+
106
+
107
+ def enable_save_strategy_online():
108
+ """
109
+ Enable save strategy metadata online.
110
+ """
111
+ strategy_layout_handle = StrategyLayout.get_instance()
112
+ if strategy_layout_handle is None:
113
+ raise ValueError("Strategy layout handle is none in parallel_strategy_checkpoint!!!")
114
+ strategy_layout_handle.enable_save_strategy_online()
115
+
116
+
117
+ def clear_strategy_metadata():
118
+ """Clear all saved strategy metadata on the C++ side."""
119
+ strategy_layout_handle = StrategyLayout.get_instance()
120
+ if strategy_layout_handle is None:
121
+ raise ValueError("Strategy layout handle is none in parallel_strategy_checkpoint!!!")
122
+ return strategy_layout_handle.clear_strategy_metadata()
123
+
124
+
125
+ class _NetStrategyInfo:
126
+ """
127
+ Describe the strategy information of a network.
128
+ """
129
+
130
+ def __init__(self, network, global_layout=None, local_layout=None):
131
+ self._network = network
132
+ self._compile_phase = network.compile_phase
133
+ if global_layout is None or local_layout is None:
134
+ layout_handle = self._get_layout_handle()
135
+ global_layout = layout_handle.global_network_layout()
136
+ local_layout = layout_handle.local_network_layout()
137
+ self._raw_global_layout = global_layout
138
+ self._raw_local_layout = local_layout
139
+
140
+ @staticmethod
141
+ def _get_layout_handle():
142
+ """Get strategy handle"""
143
+ layout_handle = StrategyLayout.get_instance()
144
+ if layout_handle is None:
145
+ raise ValueError("Strategy layout handle is none in parallel_strategy_checkpoint!!!")
146
+ return layout_handle
147
+
148
+ def get_rank_layout(self, rank_id=None):
149
+ """Get params of the network, global rank or special rank, interface."""
150
+ raw_global_layout = self._get_valid_layout(self._compile_phase, self._raw_global_layout)
151
+ if raw_global_layout is None:
152
+ return None
153
+ global_layout = self._extract_layout_metadata(raw_global_layout)
154
+ if rank_id is not None:
155
+ cur_rank_layout = {rank_id: global_layout[rank_id]}
156
+ self._layout_to_string(cur_rank_layout)
157
+ return cur_rank_layout
158
+ self._layout_to_string(global_layout)
159
+ return global_layout
160
+
161
+ def get_local_rank_layout(self):
162
+ """Get local rank params of the network, {param_name: param_info[layout]}."""
163
+ raw_local_layout = self._get_valid_layout(self._compile_phase, self._raw_local_layout)
164
+ if raw_local_layout is None:
165
+ return None
166
+ local_layout = self._extract_layout_metadata(raw_local_layout)
167
+ self._layout_to_string(local_layout)
168
+ return local_layout
169
+
170
+ @staticmethod
171
+ def _get_valid_layout(phase, layout_dict):
172
+ """Helper: Validate and extract layout by phase."""
173
+ if not phase:
174
+ return None
175
+ layout = layout_dict.get(phase)
176
+ if not layout or all(not v for v in layout.values()):
177
+ return None
178
+ return layout
179
+
180
+ def _extract_layout_metadata(self, layout: Dict[int, Dict[str, StrategyInfo]]) -> Dict:
181
+ """Return new layout of special network."""
182
+ new_layout = {}
183
+ for rank_id, param_dict in layout.items():
184
+ new_param_info = {}
185
+ for param_name, param_info in param_dict.items():
186
+ new_param_layout = self._layout_process(param_info)
187
+ new_param_info[param_name] = new_param_layout
188
+ new_layout[rank_id] = new_param_info
189
+ return new_layout
190
+
191
+ def _layout_process(self, stra_layout):
192
+ """
193
+ Return the layout list, stra_layout is one of params_info of cur_rank.
194
+ """
195
+ new_dev_mat, counter, new_tensor_map, full_opt_shard = self._get_dev_mat_for_opt_shard(
196
+ stra_layout.opt_weight_shard_size, stra_layout.dev_matrix, stra_layout.tensor_map)
197
+ alphabet = 'abcdefghijklmnopqrstuvwxyz'
198
+ alias_name = [alphabet[i] for i in range(len(new_dev_mat))]
199
+ if stra_layout.opt_weight_shard_size == 0:
200
+ new_tensor_map = tuple(tuple(alias_name[len(alias_name) - idx - 1] if idx != -1 else "None" for idx in sub)
201
+ for sub in new_tensor_map)
202
+ else:
203
+ info = SimpleNamespace(
204
+ new_dev_mat=new_dev_mat,
205
+ new_tensor_map=new_tensor_map,
206
+ full_opt_shard=full_opt_shard,
207
+ counter=counter,
208
+ alias_name=alias_name
209
+ )
210
+ new_tensor_map = self._get_tensor_map_for_opt_shard(info)
211
+ new_tensor_map = self._compact_tensor_map(new_tensor_map)
212
+ new_dev_mat = tuple(new_dev_mat)
213
+ alias_name = tuple(alias_name)
214
+ layout = Layout(new_dev_mat, alias_name, stra_layout.rank_list)
215
+ final_layout = layout(*new_tensor_map)
216
+ logger.debug("The final layout is %s", final_layout.to_dict())
217
+ cur_param_list = [final_layout, stra_layout.tensor_type, stra_layout.tensor_shape]
218
+ return cur_param_list
219
+
220
+ def _get_dev_mat_for_opt_shard(self, opt_shard, dev_mat, tensor_map):
221
+ """generate device matrix for opt shard scenario"""
222
+ if opt_shard == 0:
223
+ return dev_mat, -1, tensor_map, True
224
+ used_dev_num = self._calc_used_dev_num(dev_mat, tensor_map)
225
+ total_dev_num = int(np.prod(np.array(dev_mat)))
226
+ if opt_shard == -1 or used_dev_num * opt_shard == total_dev_num:
227
+ return dev_mat, -1, tensor_map, True
228
+ remain_dev_num = total_dev_num // (used_dev_num * opt_shard)
229
+ used_dev_mat_mask = self._get_used_dev_mat(dev_mat, tensor_map)
230
+ info = SimpleNamespace(
231
+ dev_mat=dev_mat,
232
+ tensor_map=tensor_map,
233
+ counter=-1,
234
+ real_remain_dev_num=1,
235
+ remain_dev_num=remain_dev_num
236
+ )
237
+ for axis, value in enumerate(dev_mat):
238
+ if used_dev_mat_mask[axis]:
239
+ continue
240
+ info.counter = axis
241
+ if info.real_remain_dev_num == info.remain_dev_num:
242
+ return dev_mat, axis, tensor_map, False
243
+ if info.real_remain_dev_num < info.remain_dev_num:
244
+ info.real_remain_dev_num *= value
245
+ continue
246
+ # info.real_remain_dev_num > info.remain_dev_num,split axis.
247
+ return self._split_dev_dim(info)
248
+ if info.real_remain_dev_num == info.remain_dev_num:
249
+ return dev_mat, info.counter, tensor_map, False
250
+ return self._split_dev_dim(info)
251
+
252
+ def _get_tensor_map_for_opt_shard(self, info: SimpleNamespace):
253
+ """generate tensor map for opt shard scenario"""
254
+
255
+ def idx_to_alias(idx):
256
+ return "None" if idx == -1 else info.alias_name[len(info.alias_name) - idx - 1]
257
+
258
+ def entry_to_alias(entry):
259
+ if isinstance(entry, (list, tuple)):
260
+ return tuple(idx_to_alias(i) for i in entry)
261
+ return idx_to_alias(entry)
262
+
263
+ used_dev_mat = self._get_used_dev_mat(info.new_dev_mat, info.new_tensor_map)
264
+ if info.full_opt_shard:
265
+ unused_idx = [len(used_dev_mat) - i - 1 for i, used in enumerate(used_dev_mat) if not used]
266
+ else:
267
+ unused_idx = [len(used_dev_mat) - i - 1 for i, used in enumerate(used_dev_mat) if
268
+ not used and i > info.counter]
269
+ first_entry = info.new_tensor_map[0]
270
+ first_list = list(first_entry) if isinstance(first_entry, (list, tuple)) else [first_entry]
271
+ new_first_list = [dim for dim in first_list + unused_idx if dim != -1]
272
+ first_alias_list = [idx_to_alias(i) for i in new_first_list] or ["None"]
273
+ first_alias = first_alias_list[0] if len(first_alias_list) == 1 else tuple(first_alias_list)
274
+ rest_alias = [entry_to_alias(entry) for entry in info.new_tensor_map[1:]]
275
+ new_tensor_map = tuple([first_alias] + rest_alias)
276
+ return new_tensor_map
277
+
278
+ @staticmethod
279
+ def _split_dev_dim(info: SimpleNamespace):
280
+ """Split the counter dimension of dev_mat and adjust tensor_map."""
281
+ dev_mat = info.dev_mat
282
+ counter = info.counter
283
+ splitted_dev_value = dev_mat[counter]
284
+ new_dev_mat_value_first = info.remain_dev_num // (info.real_remain_dev_num // splitted_dev_value)
285
+ new_dev_mat_value_second = splitted_dev_value // new_dev_mat_value_first
286
+ new_dev_mat = dev_mat[:counter] + [new_dev_mat_value_first, new_dev_mat_value_second] + dev_mat[counter + 1:]
287
+ flag = len(new_dev_mat) - 1 - counter
288
+ new_tensor_map = [[v if v < flag or v == -1 else v + 1 for v in sub] for sub in info.tensor_map]
289
+ return new_dev_mat, counter, new_tensor_map, False
290
+
291
+ @staticmethod
292
+ def _calc_used_dev_num(dev_mat, tensor_map):
293
+ """Count the total number of device nums that have been used."""
294
+ idx_flat = [idx for idx in chain.from_iterable(tensor_map) if idx != -1]
295
+ if not idx_flat:
296
+ return 1
297
+ prod_list = [dev_mat[len(dev_mat) - idx - 1] for idx in idx_flat]
298
+ return int(np.prod(prod_list))
299
+
300
+ @staticmethod
301
+ def _get_used_dev_mat(dev_mat, tensor_map) -> List[bool]:
302
+ """List that records whether the device ID is being used or not."""
303
+ used = set()
304
+ for elem in tensor_map:
305
+ if isinstance(elem, (list, tuple)):
306
+ used.update(i for i in elem if i != -1)
307
+ elif elem != -1:
308
+ used.add(elem)
309
+ return [(len(dev_mat) - i - 1) in used for i in range(len(dev_mat))]
310
+
311
+ @staticmethod
312
+ def _compact_tensor_map(alias_map: Sequence[StrOrTuple]) -> Tuple[StrOrTuple, ...]:
313
+ """Extend tensor map of 'None'."""
314
+
315
+ def _compress(elem: StrOrTuple) -> StrOrTuple:
316
+ if isinstance(elem, (list, tuple)):
317
+ compressed = tuple(_compress(e) for e in elem)
318
+ if len(compressed) == 1:
319
+ return compressed[0]
320
+ if all(x == 'None' for x in compressed):
321
+ return 'None'
322
+ return compressed
323
+ return elem
324
+
325
+ return tuple(_compress(e) for e in alias_map)
326
+
327
+ @staticmethod
328
+ def _layout_to_string(layout_info):
329
+ """Print layout info."""
330
+ for rank_id, param_layout in layout_info.items():
331
+ logger.info("rank_id=%s", rank_id)
332
+ for param_name, cur_param_list in param_layout.items():
333
+ final_layout, param_type, global_shape = cur_param_list
334
+ logger.info("param_name=%s: [param_layout=%s, param_type=%s, global_shape=%s]",
335
+ param_name, final_layout.to_dict(), param_type, global_shape)
336
+ logger.info("\n")
@@ -15,6 +15,7 @@
15
15
  """Transform distributed safetensors"""
16
16
  from __future__ import absolute_import
17
17
 
18
+ import copy
18
19
  import os
19
20
  import sys
20
21
  import glob
@@ -37,7 +38,6 @@ from mindspore import log as logger
37
38
  from mindspore.log import vlog_print
38
39
  from mindspore.common.parameter import Parameter
39
40
  from mindspore.common.tensor import Tensor
40
- from mindspore.common import np_dtype
41
41
  from mindspore.parallel._parallel_serialization import _get_device_num_from_strategy, _make_dir, \
42
42
  _extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
43
43
  _insert_opt_shard_reshape, _extract_src_dst_layout_map_by_src, _insert_expand_layout_reshape
@@ -51,8 +51,6 @@ from mindspore.common import dtype as mstype
51
51
 
52
52
  safetensors_to_mstype = {'Int4': mstype.qint4x2}
53
53
 
54
- np.bfloat16 = np_dtype.bfloat16
55
-
56
54
  MAX_HEADER_SIZE = 100 * 1000 * 1000
57
55
 
58
56
  dtype_size = {
@@ -71,6 +69,7 @@ dtype_size = {
71
69
  "F64": 8,
72
70
  }
73
71
  np_dtype_size = {
72
+ "bool": 1,
74
73
  "bool_": 1,
75
74
  "uint8": 1,
76
75
  "int8": 1,
@@ -96,7 +95,6 @@ numpy_dtype = {
96
95
  "I64": np.int64,
97
96
  "U64": np.uint64,
98
97
  "F16": np.float16,
99
- "BF16": np.bfloat16, # no bf16
100
98
  "F32": np.float32,
101
99
  "F64": np.float64,
102
100
  }
@@ -215,6 +213,16 @@ class PySafeSlice:
215
213
 
216
214
  @property
217
215
  def dtype(self):
216
+ """Get dtype by numpy_dtype"""
217
+ if self.info["dtype"] == "BF16":
218
+ from mindspore.common import np_dtype
219
+ if not np_dtype.np_dtype_valid(True):
220
+ raise TypeError(
221
+ "The Numpy bfloat16 data type is not supported now, please ensure that the current "
222
+ "Numpy version is not less than the version when the mindspore is compiled, "
223
+ "and the major versions are same."
224
+ )
225
+ return np_dtype.bfloat16
218
226
  return numpy_dtype[self.info["dtype"]]
219
227
 
220
228
  @property
@@ -690,6 +698,8 @@ def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map
690
698
  else:
691
699
  if transform_param_dict:
692
700
  if output_format == "safetensors":
701
+ if meta_data and "remove_redundancy" in meta_data:
702
+ meta_data["remove_redundancy"] = "False"
693
703
  _save_file_atomically(transform_param_dict, save_file_name, metadata=meta_data)
694
704
  else:
695
705
  transform_param_dict = _load_and_transform(transform_param_dict, None, None,
@@ -759,6 +769,11 @@ def transform_safetensors_by_stage(src_safetensors_dir, dst_safetensors_dir, ckp
759
769
  param_type_dict[param_name][src_rank] = str(param.data.dtype)
760
770
  param_total_dict[param_name][src_rank] = param
761
771
  param_attr_dict[param_name][src_rank] = (True, False)
772
+
773
+ ckpt_prefix = os.path.basename(ckpt_prefix)
774
+ if '..' in ckpt_prefix or '/' in ckpt_prefix or '\\' in ckpt_prefix:
775
+ raise ValueError(f"Invalid ckpt_prefix: {ckpt_prefix}. Must not contain path traversal characters.")
776
+
762
777
  for local_rank_id in range(dst_stage_device_num):
763
778
  transform_param_dict = _transform_parallel_safetensor(local_rank_id, param_total_dict,
764
779
  param_attr_dict, src_strategy_list, dst_strategy_list,
@@ -776,6 +791,7 @@ def transform_safetensors_by_rank(rank_id, safetensor_files_map, save_safetensor
776
791
  """
777
792
  Transform distributed checkpoint from source sharding strategy to destination sharding strategy by rank.
778
793
  """
794
+ save_safetensor_file_name = os.path.abspath(save_safetensor_file_name)
779
795
  if not isinstance(safetensor_files_map, dict):
780
796
  raise TypeError("The safetensor_files_map should be a dict.")
781
797
  if not isinstance(rank_id, int):
@@ -823,11 +839,84 @@ def transform_safetensors_by_rank(rank_id, safetensor_files_map, save_safetensor
823
839
  _save_file_atomically(transform_param_dict, save_safetensor_file_name, metadata={"format": "ms"})
824
840
 
825
841
 
826
- def _extrace_number(file_name):
827
- """get file last two number"""
828
- number_ls = re.findall(r'\d+', file_name)
829
- number_ls = [int(i) for i in number_ls]
830
- return number_ls[-2:]
842
+ def _extract_numbers(s):
843
+ """Extract all numbers from a string and convert them to integers."""
844
+ return [int(num) for num in re.findall(r'\d+', s)]
845
+
846
+
847
+ def _extract_last_two_numbers(file_name):
848
+ """Get the last two numbers from a filename."""
849
+ all_numbers = _extract_numbers(file_name)
850
+ return all_numbers[-2:]
851
+
852
+
853
+ def _find_shortest_file(matched_files, rank_ckpts, new_file_suffix, file_suffix):
854
+ """Find the shortest file from a list of matched files."""
855
+ min_length = min(len(os.path.basename(ckpt)) for ckpt in matched_files)
856
+ shortest_files = [ckpt for ckpt in matched_files if len(os.path.basename(ckpt)) == min_length]
857
+ if len(shortest_files) == 1:
858
+ return shortest_files[0]
859
+ raise ValueError(f"Multiple files with suffix '{file_suffix}' found in {rank_ckpts}. Following MindSpore naming "
860
+ f"rules, searched for files ending with '{new_file_suffix}' but found multiple "
861
+ f"files {matched_files}. Then searched for the shortest filename, but found multiple shortest "
862
+ f"files {shortest_files}. Please set file_suffix to the longest common suffix of all files.")
863
+
864
+
865
+ def _get_matched_file(matched, rank_ckpts, new_file_suffix, file_suffix):
866
+ """Get the file from a list of matched files."""
867
+ if len(matched) == 1:
868
+ return matched[0]
869
+ if len(matched) > 1:
870
+ return _find_shortest_file(matched, rank_ckpts, new_file_suffix, file_suffix)
871
+ raise ValueError(f"Multiple files with suffix '{file_suffix}' found in {rank_ckpts}. Following MindSpore naming "
872
+ f"rules, searched for files ending with '{new_file_suffix}' but found zero files. "
873
+ f"Please set file_suffix to the longest common suffix of all files.")
874
+
875
+
876
+ def _find_most_matching_file(rank_ckpts, file_suffix, format):
877
+ """Finds the most matching checkpoint file based on the file_suffix."""
878
+ if file_suffix is None:
879
+ rank_ckpts.sort(key=_extract_last_two_numbers)
880
+ return rank_ckpts[-1]
881
+
882
+ new_file_suffix = file_suffix
883
+ pattern1 = rf'^_(\d+)-(\d+)_(\d+)$'
884
+ matches1 = re.search(pattern1, file_suffix)
885
+ pattern2 = rf'^(\d+)-(\d+)_(\d+)$'
886
+ matches2 = re.search(pattern2, file_suffix)
887
+ # Pattern matching for _{task_id}-{epoch}_{step} format (e.g., _1-10_100 or 1-10_100)
888
+ if matches1 is not None or matches2 is not None:
889
+ if matches2 is not None:
890
+ new_file_suffix = "_" + new_file_suffix
891
+ matched = [ckpt for ckpt in rank_ckpts if ckpt.endswith(f"{new_file_suffix}.{format}") and
892
+ not ckpt.endswith(f"rank{new_file_suffix}.{format}")]
893
+ return _get_matched_file(matched, rank_ckpts, new_file_suffix, file_suffix)
894
+
895
+ pattern3 = rf'^-(\d+)_(\d+)$'
896
+ matches3 = re.search(pattern3, file_suffix)
897
+ pattern4 = rf'^(\d+)_(\d+)$'
898
+ matches4 = re.search(pattern4, file_suffix)
899
+ # Pattern matching for -{epoch}_{step} format (e.g., -10_100 or 10_100)
900
+ if matches3 is not None or matches4 is not None:
901
+ if matches4 is not None:
902
+ new_file_suffix = "-" + new_file_suffix
903
+ matched = [ckpt for ckpt in rank_ckpts if ckpt.endswith(f"{new_file_suffix}.{format}")]
904
+ return _get_matched_file(matched, rank_ckpts, new_file_suffix, file_suffix)
905
+
906
+ pattern5 = rf'^_(\d+)$'
907
+ matches5 = re.search(pattern5, file_suffix)
908
+ pattern6 = rf'^(\d+)$'
909
+ matches6 = re.search(pattern6, file_suffix)
910
+ # Pattern matching for _{step} format (e.g., _100 or 100)
911
+ if matches5 is not None or matches6 is not None:
912
+ if matches6 is not None:
913
+ new_file_suffix = "_" + new_file_suffix
914
+ matched = [ckpt for ckpt in rank_ckpts if ckpt.endswith(f"{new_file_suffix}.{format}")]
915
+ return _get_matched_file(matched, rank_ckpts, new_file_suffix, file_suffix)
916
+
917
+ raise ValueError(f"Multiple {format} files ending with '{file_suffix}' found in {rank_ckpts}. "
918
+ f"Cannot determine which file is the intended one. "
919
+ f"Please set file_suffix to the longest common suffix.")
831
920
 
832
921
 
833
922
  def _collect_safetensor_files(src_safetensors_dir, format='safetensors', file_suffix=None):
@@ -838,6 +927,9 @@ def _collect_safetensor_files(src_safetensors_dir, format='safetensors', file_su
838
927
  return {0: src_safetensors_dir}
839
928
  safetensors_rank_dir_list = os.path.join(src_safetensors_dir, "rank_[0-9]*")
840
929
  all_safetensor_files_map = {}
930
+ multiple_files_found_flag = False
931
+ multiple_files_list = None
932
+ chosen_file = None
841
933
  for safetensor_dir in glob.glob(safetensors_rank_dir_list):
842
934
  if not os.path.isdir(safetensor_dir):
843
935
  ms.log.warning("{} is not a directory.".format(safetensor_dir))
@@ -853,9 +945,23 @@ def _collect_safetensor_files(src_safetensors_dir, format='safetensors', file_su
853
945
  else:
854
946
  safetensor_file_name = os.path.join(safetensor_dir, f"*{file_suffix}.{format}")
855
947
  rank_ckpts = glob.glob(safetensor_file_name)
856
- rank_ckpts.sort(key=_extrace_number)
857
- if rank_ckpts:
858
- all_safetensor_files_map[rank_id] = rank_ckpts[-1]
948
+ if len(rank_ckpts) > 1:
949
+ all_safetensor_files_map[rank_id] = _find_most_matching_file(rank_ckpts, file_suffix, format)
950
+ if not multiple_files_found_flag:
951
+ multiple_files_found_flag = True
952
+ multiple_files_list = copy.deepcopy(rank_ckpts)
953
+ chosen_file = all_safetensor_files_map[rank_id]
954
+ elif rank_ckpts:
955
+ all_safetensor_files_map[rank_id] = rank_ckpts[0]
956
+ elif file_suffix is not None:
957
+ raise ValueError(f"No safetensors files found in directory '{safetensor_dir}' "
958
+ f"with suffix '{file_suffix}' and format '{format}'. "
959
+ f"Please verify the directory contains the expected files. "
960
+ f"Recommend setting file_suffix to the longest common suffix.")
961
+ if file_suffix is not None and multiple_files_found_flag:
962
+ logger.warning(f"When unified_safetensors files with file_suffix `{file_suffix}`, multiple files were found. "
963
+ f"Showing one list: {multiple_files_list}; selected `{chosen_file}` from it. "
964
+ f"Please check whether the file_suffix is set correctly.")
859
965
  return all_safetensor_files_map
860
966
 
861
967
 
@@ -972,7 +1078,7 @@ def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, s
972
1078
  def _cal_param_size(shape, dtype):
973
1079
  """cal param size by dtype and shape"""
974
1080
  num_elements = math.prod(shape)
975
- element_size = np_dtype_size.get(dtype, 4)
1081
+ element_size = np_dtype_size.get(str(dtype), 4)
976
1082
  total_bytes = num_elements * element_size
977
1083
  return total_bytes
978
1084
 
@@ -1135,7 +1241,7 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundan
1135
1241
  if os.path.isfile(src_dir):
1136
1242
  raise ValueError("For 'unified_safetensors', the 'src_dir' can not be a file.")
1137
1243
  all_safetensor_files_map = _collect_safetensor_files(src_dir, format="safetensors", file_suffix=file_suffix)
1138
- all_ckpt_files_map = _collect_safetensor_files(src_dir, format="ckpt", file_suffix=file_suffix)
1244
+ all_ckpt_files_map = _collect_safetensor_files(src_dir, format="ckpt")
1139
1245
  if all_safetensor_files_map and all_ckpt_files_map:
1140
1246
  raise ValueError("For 'unified_safetensors', the 'src_dir' cannot contain "
1141
1247
  "both ckpt file and safetensors file simultaneously")
@@ -1173,11 +1279,6 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundan
1173
1279
  with _fast_safe_open(file_name, framework="np") as f:
1174
1280
  for k in f.keys():
1175
1281
  if k in name_list:
1176
- py_slice = f.get_tensor(k)
1177
- param_total_size += _cal_param_size(py_slice.shape, py_slice.dtype)
1178
- param_dst_shape = _get_dst_shape(k, py_slice.shape, origin_src_strategy_list)
1179
- # Convert the shape of np.int32 type to int type to prevent overflow in subsequent calculations.
1180
- param_dst_shape = [int(item) for item in param_dst_shape]
1181
1282
  if choice_func is not None:
1182
1283
  choice_out = choice_func(k)
1183
1284
  if isinstance(choice_out, bool):
@@ -1185,7 +1286,13 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundan
1185
1286
  name_list.remove(k)
1186
1287
  continue
1187
1288
  if k not in param_size_dict:
1188
- param_size_dict[k] = _cal_param_size(param_dst_shape, py_slice.dtype)
1289
+ py_slice = f.get_tensor(k)
1290
+ param_dst_shape = _get_dst_shape(k, py_slice.shape, origin_src_strategy_list)
1291
+ # Convert the shape of np.int32 type to int type to prevent overflow in subsequent calculations.
1292
+ param_dst_shape = [int(item) for item in param_dst_shape]
1293
+ param_size = _cal_param_size(param_dst_shape, py_slice.dtype)
1294
+ param_total_size += param_size
1295
+ param_size_dict[k] = param_size
1189
1296
  split_num = math.ceil(sum(param_size_dict.values()) / 1024 / 1024 / 1024 / 3)
1190
1297
  split_num = min(split_num, len(name_list))
1191
1298
  split_list = _split_weight_dict(param_size_dict, split_num)
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  # ============================================================================
15
15
  """Ascend kernel details viewer"""
16
- import csv
17
16
  import os
18
17
  from decimal import Decimal
19
18
 
@@ -25,8 +24,7 @@ from mindspore.profiler.common.constant import (
25
24
  ProfilerActivity
26
25
  )
27
26
  from mindspore.profiler.common.log import ProfilerLogger
28
- from mindspore.profiler.common.path_manager import PathManager
29
-
27
+ from mindspore.profiler.common.file_manager import FileManager
30
28
  from mindspore import log as logger
31
29
 
32
30
 
@@ -110,12 +108,15 @@ class AscendKernelDetailsViewer(BaseViewer):
110
108
  Write data to csv file.
111
109
  """
112
110
  self._logger.info("Kernel details saved start")
113
- PathManager.check_directory_path_writeable(os.path.dirname(self._save_path))
114
- with open(self._save_path, "w", newline="", encoding="utf-8") as csvfile:
115
- writer = csv.writer(csvfile)
116
- writer.writerow(self.kernel_details_headers)
117
- for row in self.op_summary:
118
- writer.writerow([row[field] for field in self.op_summary_headers])
111
+ csv_data = []
112
+ for row in self.op_summary:
113
+ csv_row = [row[field] for field in self.op_summary_headers]
114
+ csv_data.append(csv_row)
115
+ FileManager.create_csv_file(
116
+ file_path=self._save_path,
117
+ data=csv_data,
118
+ headers=self.kernel_details_headers
119
+ )
119
120
  self._logger.info("Kernel details saved done")
120
121
 
121
122
  def _update_headers(self):
@@ -247,4 +248,7 @@ def _get_step_id_by_ts(ts: Decimal, step_events_dict: dict):
247
248
  if st <= ts <= et:
248
249
  return step_id
249
250
 
251
+ if step_events_dict:
252
+ return list(step_events_dict.keys())[-1]
253
+
250
254
  return None
@@ -330,7 +330,7 @@ class AscendOpMemoryViewer:
330
330
  res.append(self._combine_alloc_and_free_event(alloc_event, free_event))
331
331
  alloc_event, free_event = None, None
332
332
  elif alloc_event is None and free_event:
333
- self._logger.error("Alloc event is None, but free event is not None")
333
+ self._logger.warning("Alloc event is None, but free event is not None")
334
334
 
335
335
  if alloc_event:
336
336
  res.append(self._combine_alloc_and_free_event(alloc_event))
@@ -500,7 +500,7 @@ class BottleneckAnalyzer:
500
500
  in_op_id, out_q = self._get_non_inline_child_recur(op_id), self.queue_utilization_pct[op_id]
501
501
  # This is a leaf node since input queue does not exist and output queue exists
502
502
  if in_op_id == self.op_id_not_exist and out_q != self.queue_usage_not_exist:
503
- if out_q < self._THRESHOLDS['_LEAF_OUTPUT_QUEUE_EMPTY_FREQ_PCT_MAXIMUM']:
503
+ if out_q <= self._THRESHOLDS['_LEAF_OUTPUT_QUEUE_EMPTY_FREQ_PCT_MAXIMUM']:
504
504
  queue_usage_analysis.append(self._format_leaf_node_suggestion(op_id, out_q))
505
505
  # This is device_queue op
506
506
  elif self.op_names[op_id] == "DeviceQueue" and in_op_id != self.op_id_not_exist:
@@ -226,3 +226,8 @@ class HostSystem(Enum):
226
226
  DISK = "disk"
227
227
  NETWORK = "network"
228
228
  OSRT = "osrt"
229
+
230
+
231
+ class MsprofModeName:
232
+ """msprof mode name"""
233
+ MSPROF_DYNAMIC_ENV = "PROFILING_MODE"