mindspore 2.7.0__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (290) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -1
  3. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  6. mindspore/_extends/parse/compile_config.py +24 -1
  7. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -2
  8. mindspore/_extends/parse/resources.py +1 -1
  9. mindspore/_extends/parse/standard_method.py +8 -1
  10. mindspore/_extends/parse/trope.py +2 -1
  11. mindspore/_extends/pijit/pijit_func_white_list.py +7 -22
  12. mindspore/avcodec-59.dll +0 -0
  13. mindspore/avdevice-59.dll +0 -0
  14. mindspore/avfilter-8.dll +0 -0
  15. mindspore/avformat-59.dll +0 -0
  16. mindspore/avutil-57.dll +0 -0
  17. mindspore/boost/base.py +29 -2
  18. mindspore/common/_decorator.py +3 -2
  19. mindspore/common/_grad_function.py +3 -1
  20. mindspore/common/_tensor_cpp_method.py +1 -1
  21. mindspore/common/_tensor_docs.py +275 -64
  22. mindspore/common/_utils.py +0 -44
  23. mindspore/common/api.py +285 -35
  24. mindspore/common/dump.py +7 -108
  25. mindspore/common/dynamic_shape/auto_dynamic_shape.py +1 -3
  26. mindspore/common/hook_handle.py +60 -0
  27. mindspore/common/jit_config.py +5 -1
  28. mindspore/common/jit_trace.py +27 -12
  29. mindspore/common/lazy_inline.py +5 -3
  30. mindspore/common/parameter.py +13 -107
  31. mindspore/common/recompute.py +4 -11
  32. mindspore/common/tensor.py +16 -169
  33. mindspore/communication/_comm_helper.py +11 -1
  34. mindspore/communication/comm_func.py +138 -4
  35. mindspore/communication/management.py +85 -1
  36. mindspore/config/op_info.config +0 -15
  37. mindspore/context.py +5 -85
  38. mindspore/dataset/engine/datasets.py +8 -4
  39. mindspore/dataset/engine/datasets_vision.py +1 -1
  40. mindspore/dataset/engine/validators.py +1 -15
  41. mindspore/dnnl.dll +0 -0
  42. mindspore/{experimental/llm_boost/ascend_native → graph}/__init__.py +7 -7
  43. mindspore/graph/custom_pass.py +55 -0
  44. mindspore/include/dataset/execute.h +2 -2
  45. mindspore/jpeg62.dll +0 -0
  46. mindspore/mindrecord/__init__.py +3 -3
  47. mindspore/mindrecord/common/exceptions.py +1 -0
  48. mindspore/mindrecord/config.py +1 -1
  49. mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
  50. mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
  51. mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
  52. mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
  53. mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
  54. mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
  55. mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
  56. mindspore/mindrecord/filereader.py +4 -4
  57. mindspore/mindrecord/filewriter.py +5 -5
  58. mindspore/mindrecord/mindpage.py +2 -2
  59. mindspore/mindrecord/tools/cifar10.py +1 -1
  60. mindspore/mindrecord/tools/cifar100.py +1 -1
  61. mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
  62. mindspore/mindrecord/tools/cifar10_to_mr.py +1 -1
  63. mindspore/mindrecord/tools/csv_to_mr.py +1 -1
  64. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  65. mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
  66. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
  67. mindspore/mindspore_backend_common.dll +0 -0
  68. mindspore/mindspore_backend_manager.dll +0 -0
  69. mindspore/mindspore_cluster.dll +0 -0
  70. mindspore/mindspore_common.dll +0 -0
  71. mindspore/mindspore_core.dll +0 -0
  72. mindspore/mindspore_cpu.dll +0 -0
  73. mindspore/mindspore_dump.dll +0 -0
  74. mindspore/mindspore_frontend.dll +0 -0
  75. mindspore/mindspore_glog.dll +0 -0
  76. mindspore/mindspore_hardware_abstract.dll +0 -0
  77. mindspore/mindspore_memory_pool.dll +0 -0
  78. mindspore/mindspore_ms_backend.dll +0 -0
  79. mindspore/mindspore_ops.dll +0 -0
  80. mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
  81. mindspore/mindspore_profiler.dll +0 -0
  82. mindspore/mindspore_pyboost.dll +0 -0
  83. mindspore/mindspore_pynative.dll +0 -0
  84. mindspore/mindspore_runtime_pipeline.dll +0 -0
  85. mindspore/mindspore_runtime_utils.dll +0 -0
  86. mindspore/mindspore_tools.dll +0 -0
  87. mindspore/mint/__init__.py +15 -10
  88. mindspore/mint/distributed/distributed.py +182 -62
  89. mindspore/mint/nn/__init__.py +2 -16
  90. mindspore/mint/nn/functional.py +4 -110
  91. mindspore/mint/nn/layer/__init__.py +0 -2
  92. mindspore/mint/nn/layer/activation.py +0 -6
  93. mindspore/mint/nn/layer/basic.py +0 -47
  94. mindspore/mint/nn/layer/conv.py +4 -4
  95. mindspore/mint/nn/layer/normalization.py +8 -13
  96. mindspore/mint/nn/layer/pooling.py +0 -4
  97. mindspore/nn/__init__.py +1 -3
  98. mindspore/nn/cell.py +16 -66
  99. mindspore/nn/layer/basic.py +49 -1
  100. mindspore/nn/layer/container.py +16 -0
  101. mindspore/nn/layer/embedding.py +4 -169
  102. mindspore/nn/layer/normalization.py +2 -1
  103. mindspore/nn/layer/thor_layer.py +4 -85
  104. mindspore/nn/optim/ada_grad.py +0 -1
  105. mindspore/nn/optim/adafactor.py +0 -1
  106. mindspore/nn/optim/adam.py +31 -124
  107. mindspore/nn/optim/adamax.py +0 -1
  108. mindspore/nn/optim/asgd.py +0 -1
  109. mindspore/nn/optim/ftrl.py +8 -102
  110. mindspore/nn/optim/lamb.py +0 -1
  111. mindspore/nn/optim/lars.py +0 -3
  112. mindspore/nn/optim/lazyadam.py +25 -218
  113. mindspore/nn/optim/momentum.py +5 -43
  114. mindspore/nn/optim/optimizer.py +6 -55
  115. mindspore/nn/optim/proximal_ada_grad.py +0 -1
  116. mindspore/nn/optim/rmsprop.py +0 -1
  117. mindspore/nn/optim/rprop.py +0 -1
  118. mindspore/nn/optim/sgd.py +0 -1
  119. mindspore/nn/optim/tft_wrapper.py +0 -1
  120. mindspore/nn/optim/thor.py +0 -2
  121. mindspore/nn/probability/bijector/bijector.py +7 -8
  122. mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
  123. mindspore/nn/probability/bijector/power_transform.py +20 -21
  124. mindspore/nn/probability/bijector/scalar_affine.py +5 -5
  125. mindspore/nn/probability/bijector/softplus.py +13 -14
  126. mindspore/nn/wrap/grad_reducer.py +4 -74
  127. mindspore/numpy/array_creations.py +2 -2
  128. mindspore/numpy/fft.py +9 -9
  129. mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
  130. mindspore/onnx/onnx_export.py +137 -0
  131. mindspore/opencv_core4110.dll +0 -0
  132. mindspore/opencv_imgcodecs4110.dll +0 -0
  133. mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
  134. mindspore/ops/__init__.py +2 -0
  135. mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
  136. mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
  137. mindspore/ops/_op_impl/cpu/__init__.py +0 -5
  138. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +16 -22
  139. mindspore/ops/auto_generate/gen_extend_func.py +2 -7
  140. mindspore/ops/auto_generate/gen_ops_def.py +98 -141
  141. mindspore/ops/auto_generate/gen_ops_prim.py +12708 -12686
  142. mindspore/ops/communication.py +97 -0
  143. mindspore/ops/composite/__init__.py +5 -2
  144. mindspore/ops/composite/base.py +15 -1
  145. mindspore/ops/composite/multitype_ops/__init__.py +3 -1
  146. mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
  147. mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
  148. mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
  149. mindspore/ops/function/__init__.py +1 -0
  150. mindspore/ops/function/array_func.py +14 -12
  151. mindspore/ops/function/comm_func.py +3883 -0
  152. mindspore/ops/function/debug_func.py +3 -4
  153. mindspore/ops/function/math_func.py +45 -54
  154. mindspore/ops/function/nn_func.py +75 -294
  155. mindspore/ops/function/random_func.py +9 -18
  156. mindspore/ops/functional.py +2 -0
  157. mindspore/ops/functional_overload.py +354 -18
  158. mindspore/ops/operations/__init__.py +2 -5
  159. mindspore/ops/operations/_custom_ops_utils.py +7 -9
  160. mindspore/ops/operations/_inner_ops.py +1 -38
  161. mindspore/ops/operations/_rl_inner_ops.py +0 -933
  162. mindspore/ops/operations/array_ops.py +1 -0
  163. mindspore/ops/operations/comm_ops.py +94 -2
  164. mindspore/ops/operations/custom_ops.py +228 -19
  165. mindspore/ops/operations/debug_ops.py +27 -29
  166. mindspore/ops/operations/manually_defined/ops_def.py +27 -306
  167. mindspore/ops/operations/nn_ops.py +2 -2
  168. mindspore/ops/operations/sparse_ops.py +0 -83
  169. mindspore/ops/primitive.py +1 -17
  170. mindspore/ops/tensor_method.py +72 -3
  171. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
  172. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
  173. mindspore/ops_generate/api/functions_cc_generator.py +53 -4
  174. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
  175. mindspore/ops_generate/common/gen_constants.py +11 -10
  176. mindspore/ops_generate/common/op_proto.py +18 -1
  177. mindspore/ops_generate/common/template.py +102 -245
  178. mindspore/ops_generate/common/template_utils.py +212 -0
  179. mindspore/ops_generate/gen_custom_ops.py +69 -0
  180. mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
  181. mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
  182. mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
  183. mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
  184. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
  185. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
  186. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
  187. mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
  188. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
  189. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
  190. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
  191. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
  192. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
  193. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
  194. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
  195. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
  196. mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
  197. mindspore/ops_generate/resources/yaml_loader.py +13 -0
  198. mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
  199. mindspore/parallel/_cell_wrapper.py +1 -1
  200. mindspore/parallel/_parallel_serialization.py +1 -4
  201. mindspore/parallel/_utils.py +29 -6
  202. mindspore/parallel/checkpoint_transform.py +18 -2
  203. mindspore/parallel/cluster/process_entity/_api.py +24 -32
  204. mindspore/parallel/cluster/process_entity/_utils.py +9 -5
  205. mindspore/{experimental/llm_boost/atb → parallel/distributed}/__init__.py +21 -23
  206. mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
  207. mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
  208. mindspore/parallel/strategy.py +336 -0
  209. mindspore/parallel/transform_safetensors.py +117 -16
  210. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +3 -0
  211. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
  212. mindspore/profiler/common/constant.py +5 -0
  213. mindspore/profiler/common/file_manager.py +9 -0
  214. mindspore/profiler/common/msprof_cmd_tool.py +38 -2
  215. mindspore/profiler/common/path_manager.py +56 -24
  216. mindspore/profiler/common/profiler_context.py +2 -12
  217. mindspore/profiler/common/profiler_info.py +3 -3
  218. mindspore/profiler/common/profiler_path_manager.py +13 -0
  219. mindspore/profiler/common/util.py +30 -3
  220. mindspore/profiler/experimental_config.py +2 -1
  221. mindspore/profiler/platform/npu_profiler.py +33 -6
  222. mindspore/run_check/_check_version.py +108 -24
  223. mindspore/runtime/__init__.py +3 -2
  224. mindspore/runtime/executor.py +11 -3
  225. mindspore/runtime/memory.py +112 -0
  226. mindspore/swresample-4.dll +0 -0
  227. mindspore/swscale-6.dll +0 -0
  228. mindspore/tinyxml2.dll +0 -0
  229. mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
  230. mindspore/tools/data_dump.py +130 -0
  231. mindspore/tools/sdc_detect.py +91 -0
  232. mindspore/tools/stress_detect.py +63 -0
  233. mindspore/train/__init__.py +6 -6
  234. mindspore/train/_utils.py +5 -18
  235. mindspore/train/amp.py +6 -4
  236. mindspore/train/callback/_checkpoint.py +0 -9
  237. mindspore/train/callback/_train_fault_tolerance.py +69 -18
  238. mindspore/train/data_sink.py +1 -5
  239. mindspore/train/model.py +38 -211
  240. mindspore/train/serialization.py +126 -387
  241. mindspore/turbojpeg.dll +0 -0
  242. mindspore/utils/__init__.py +6 -3
  243. mindspore/utils/dlpack.py +92 -0
  244. mindspore/utils/dryrun.py +1 -1
  245. mindspore/utils/runtime_execution_order_check.py +10 -0
  246. mindspore/utils/sdc_detect.py +14 -12
  247. mindspore/utils/stress_detect.py +43 -0
  248. mindspore/utils/utils.py +144 -8
  249. mindspore/version.py +1 -1
  250. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
  251. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/RECORD +254 -267
  252. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -210
  253. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
  254. mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
  255. mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
  256. mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
  257. mindspore/experimental/llm_boost/register.py +0 -130
  258. mindspore/experimental/llm_boost/utils.py +0 -31
  259. mindspore/include/OWNERS +0 -7
  260. mindspore/mindspore_cpu_res_manager.dll +0 -0
  261. mindspore/mindspore_ops_kernel_common.dll +0 -0
  262. mindspore/mindspore_res_manager.dll +0 -0
  263. mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
  264. mindspore/nn/reinforcement/_batch_read_write.py +0 -142
  265. mindspore/nn/reinforcement/_tensors_queue.py +0 -152
  266. mindspore/nn/reinforcement/tensor_array.py +0 -145
  267. mindspore/opencv_core452.dll +0 -0
  268. mindspore/opencv_imgcodecs452.dll +0 -0
  269. mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
  270. mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
  271. mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
  272. mindspore/ops/_op_impl/cpu/buffer_append.py +0 -28
  273. mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
  274. mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
  275. mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
  276. mindspore/ops/operations/_tensor_array.py +0 -359
  277. mindspore/ops/operations/rl_ops.py +0 -288
  278. mindspore/parallel/_offload_context.py +0 -275
  279. mindspore/parallel/_recovery_context.py +0 -115
  280. mindspore/parallel/_transformer/__init__.py +0 -35
  281. mindspore/parallel/_transformer/layers.py +0 -765
  282. mindspore/parallel/_transformer/loss.py +0 -251
  283. mindspore/parallel/_transformer/moe.py +0 -693
  284. mindspore/parallel/_transformer/op_parallel_config.py +0 -222
  285. mindspore/parallel/_transformer/transformer.py +0 -3124
  286. mindspore/parallel/mpi/_mpi_config.py +0 -116
  287. mindspore/train/memory_profiling_pb2.py +0 -298
  288. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
  289. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
  290. {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
  # ============================================================================
15
15
  """
16
- Generates mindspore/ccsrc/pybind_api/ir/tensor_py.cc which includes the CPython Tensor APIs.
16
+ Generates mindspore/ccsrc/pybind_api/ir/tensor/tensor_py.cc which includes the CPython Tensor APIs.
17
17
  """
18
18
 
19
19
  import os
@@ -26,7 +26,7 @@ from pyboost import pyboost_utils
26
26
 
27
27
  class TensorPyCppGenerator(BaseGenerator):
28
28
  """
29
- This class is responsible for generating mindspore/ccsrc/pybind_api/ir/tensor_register/
29
+ This class is responsible for generating mindspore/ccsrc/pybind_api/ir/tensor/tensor_register/
30
30
  auto_generate/tensor_py_gen.cc
31
31
  """
32
32
  def __init__(self):
@@ -263,7 +263,7 @@ def _single_parameter_broadcast(net, layout, param_not_load=None, param_loaded=N
263
263
  if not single_params:
264
264
  return
265
265
  param_redundancy_reversed = _get_param_redundancy_reversed(param_redundancy, cur_rank)
266
- if not param_redundancy_reversed or cur_rank not in single_params:
266
+ if not param_redundancy_reversed:
267
267
  return
268
268
  net_param_dict = net.parameters_dict()
269
269
  _chang_parallel_context(origin_dataset_strategy)
@@ -526,10 +526,7 @@ def _make_dir(path, arg_name):
526
526
  else:
527
527
  ms.log.debug("The directory(%s) doesn't exist, will create it", path)
528
528
  try:
529
- permissions = os.R_OK | os.W_OK | os.X_OK
530
- os.umask(permissions << 3 | permissions)
531
- mode = permissions << 6
532
- os.makedirs(path, mode=mode, exist_ok=True)
529
+ os.makedirs(path, mode=0o700, exist_ok=True)
533
530
  real_path = path
534
531
  except PermissionError as e:
535
532
  ms.log.critical("No write permission on the directory(%r), error = %r", path, e)
@@ -14,12 +14,13 @@
14
14
  # ============================================================================
15
15
  """Utils of auto parallel"""
16
16
  import os
17
+ import re
17
18
  from time import perf_counter
18
19
  from importlib import import_module
19
20
  import numpy as np
20
21
  import mindspore as ms
21
22
  from mindspore import context, log as logger
22
- from mindspore._c_expression import reset_op_id, reset_op_id_with_offset
23
+ from mindspore._c_expression import reset_op_id
23
24
  from mindspore.common.tensor import Tensor
24
25
  from mindspore.common.dtype import _dtype_to_nptype
25
26
  from mindspore.common import dtype as mstype
@@ -584,11 +585,6 @@ def _reset_op_id():
584
585
  reset_op_id()
585
586
 
586
587
 
587
- def _reset_op_id_with_offset():
588
- """Reset op id with offset."""
589
- reset_op_id_with_offset()
590
-
591
-
592
588
  def _parallel_predict_check():
593
589
  """validate parallel model prediction"""
594
590
  if _is_in_auto_parallel_mode():
@@ -798,3 +794,30 @@ def _check_rank(cur_rank, initial_rank, pipeline_stages):
798
794
  raise ValueError(f"For parameter broadcast, the cur_rank: {cur_rank} is wrong.")
799
795
  if initial_rank % (get_group_size() / pipeline_stages) != 0:
800
796
  raise ValueError(f"For parameter broadcast, the initial_rank: {initial_rank} is wrong.")
797
+
798
+
799
+ def _check_path_safe(path, arg_name):
800
+ """
801
+ Check input path string is safe.
802
+ """
803
+ illegal_patterns = [
804
+ r"\.\.",
805
+ r"//+",
806
+ r"~",
807
+ r"^\s*$",
808
+ r"\./\."
809
+ ]
810
+ for pattern in illegal_patterns:
811
+ if re.search(pattern, path):
812
+ pattern_info = pattern.replace('\\', '')
813
+ raise ValueError(f"{arg_name} contains '{pattern_info}' is not safe, please use a safe one.")
814
+
815
+
816
+ def _check_path_writable(path):
817
+ """
818
+ Check the write permission of the input path.
819
+ """
820
+ if not os.path.exists(path):
821
+ raise RuntimeError(f"{path} Path does not exist.")
822
+ if not os.access(path, os.W_OK):
823
+ raise PermissionError(f"Don't have the write permission on the directory {path}.")
@@ -31,7 +31,7 @@ from mindspore.communication.management import get_rank, get_group_size
31
31
  from mindspore.parallel._tensor import _load_tensor, _reshape_param_data, _reshape_param_data_with_weight, \
32
32
  _get_tensor_slice_index, _get_tensor_strategy
33
33
  from mindspore.parallel._utils import _is_in_auto_parallel_mode, _get_pipeline_stages, _infer_rank_list, \
34
- _remove_repeated_slices, _get_auto_parallel_net
34
+ _remove_repeated_slices, _get_auto_parallel_net, _check_path_safe, _check_path_writable
35
35
  from mindspore.parallel._parallel_serialization import _rank_list_for_transform_parallel_checkpoint, \
36
36
  _transform_parallel_checkpoint, _get_device_num_from_strategy, _make_dir, _build_searched_strategy, \
37
37
  _extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
@@ -69,7 +69,9 @@ def merge_pipeline_strategys(src_strategy_dirs, dst_strategy_file):
69
69
  >>> ms.parallel.merge_pipeline_strategys("./src_strategy_dir", "./dst_strategy.ckpt")
70
70
 
71
71
  """
72
- dst_strategy_dir, _ = os.path.split(dst_strategy_file)
72
+ dst_strategy_file = os.path.normpath(dst_strategy_file)
73
+ dst_strategy_file = os.path.abspath(dst_strategy_file)
74
+ dst_strategy_dir = os.path.dirname(dst_strategy_file)
73
75
  if not os.path.exists(dst_strategy_dir):
74
76
  _make_dir(dst_strategy_dir, "path")
75
77
  if not os.path.isdir(src_strategy_dirs):
@@ -495,6 +497,9 @@ def _transform_checkpoint_by_stage(src_checkpoints_dir, dst_checkpoints_dir, ckp
495
497
  def _transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix, src_strategy_file=None,
496
498
  dst_strategy_file=None):
497
499
  """Transform checkpoints for all stages in src_strategy_file"""
500
+ _check_path_safe(dst_checkpoints_dir, "dst_checkpoints_dir")
501
+ dst_checkpoints_dir = os.path.realpath(dst_checkpoints_dir)
502
+ _check_path_safe(ckpt_prefix, "ckpt_prefix")
498
503
  checkpoints_rank_dir_list = os.path.join(src_checkpoints_dir, "rank_[0-9]*")
499
504
  all_checkpoint_files_map = {}
500
505
  for checkpoint_dir in glob.glob(checkpoints_rank_dir_list):
@@ -563,6 +568,7 @@ def _transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix
563
568
  save_checkpoint_file_dir = os.path.join(dst_checkpoints_dir, "rank_{}".format(transform_rank))
564
569
  if not os.path.exists(save_checkpoint_file_dir):
565
570
  _make_dir(save_checkpoint_file_dir, "path")
571
+ _check_path_writable(save_checkpoint_file_dir)
566
572
  save_checkpoint_file_name = os.path.join(save_checkpoint_file_dir, save_checkpoint_file)
567
573
  ms.save_checkpoint(transform_param_list, save_checkpoint_file_name)
568
574
  del param_total_dict_copy
@@ -913,6 +919,15 @@ def set_op_strategy_config(mode="SAVE", path=""):
913
919
  if file_type != ".json":
914
920
  raise KeyError("File type must be .json")
915
921
  dir_path = os.path.dirname(path)
922
+
923
+ normalized_path = os.path.abspath(os.path.realpath(path))
924
+ dangerous_paths = ['/etc', '/usr', '/bin', '/sbin', '/boot', '/proc', '/sys']
925
+ for dangerous_path in dangerous_paths:
926
+ if normalized_path.startswith(dangerous_path):
927
+ raise PermissionError(
928
+ f"Writing to system directory '{dangerous_path}' is not allowed"
929
+ )
930
+
916
931
  if dir_path and not os.path.exists(dir_path):
917
932
  os.makedirs(dir_path, mode=0o700, exist_ok=True)
918
933
  check_mode_type = ["SAVE", "LOAD"]
@@ -1182,6 +1197,7 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
1182
1197
 
1183
1198
  param_total_dict = defaultdict(dict)
1184
1199
  for file_index, file_name in enumerate(checkpoint_filenames):
1200
+ file_name = os.path.abspath(file_name)
1185
1201
  ckpt_dict = ms.load_checkpoint(file_name, dec_key=dec_key, dec_mode=dec_mode)
1186
1202
  for param_name, param in ckpt_dict.items():
1187
1203
  param_total_dict[param_name][file_index] = param
@@ -21,6 +21,7 @@ import subprocess
21
21
  import socket
22
22
  import psutil
23
23
  import mindspore.log as logger
24
+ from mindspore.utils import RSCPluginHandle
24
25
  from ._utils import _generate_cmd_args_list, _generate_cmd_args_list_with_core, _generate_url, \
25
26
  _is_local_ip, _convert_addr_to_ip, _send_scale_num, _get_local_ip, _generate_auto_bind_core_strategy, \
26
27
  _generate_bind_core_strategy
@@ -221,23 +222,28 @@ class _ProcessManager:
221
222
 
222
223
  self.proc_rank_map = {}
223
224
  self.enable_mindx = False
225
+ self.handler = None
224
226
  self._check_taskd()
225
227
 
226
228
  def _check_taskd(self):
227
229
  """check if enable taskd."""
228
- tft_env = os.getenv("MS_ENABLE_TFT", "")
229
- if any(v in tft_env for v in ('TTP:1', 'UCE:1', 'ARF:1', 'TSP:1', 'RSC:1', 'HCCE:1')):
230
- try:
231
- from taskd.python.framework.agent.ms_mgr.msrun_plugin import MSRunPlugin
232
- self.msmgr = MSRunPlugin()
233
- self.msmgr.register_callbacks("KILL_WORKER", self.kill_workers)
234
- self.msmgr.register_callbacks("START_ALL_WORKER", self.start_all_workers)
235
- self.msmgr.register_callbacks("START_WORKER_LIST", self.start_worker_list)
236
- self.msmgr.register_callbacks("MONITOR", self.monitor_rank_status)
237
- self.enable_mindx = True
238
- os.environ["MS_ENABLE_RECOVERY"] = str(1)
239
- except Exception as e: # pylint: disable=broad-except
240
- logger.warning(f"mindx is not installed, using original mindspore recovery strategy.: {str(e)}")
230
+ self.handler = RSCPluginHandle()
231
+ self.enable_mindx = self.handler.check_enable()
232
+ if self.enable_mindx is False:
233
+ self.handler = None
234
+ return
235
+ ret = self.handler.register_callback({"KILL_WORKER": self.kill_workers,
236
+ "START_ALL_WORKER": self.start_all_workers,
237
+ "START_WORKER_LIST": self.start_worker_list,
238
+ "MONITOR": self.monitor_rank_status
239
+ })
240
+ if not ret:
241
+ logger.warning(f"Register callback to mindx failed, process controlled by msrun.")
242
+ self.enable_mindx = False
243
+ self.handler = None
244
+ return
245
+ logger.warning(f"Mindx enabled, process controlled by mindx.")
246
+ os.environ["MS_ENABLE_RECOVERY"] = str(1)
241
247
 
242
248
  def run(self):
243
249
  """
@@ -260,7 +266,7 @@ class _ProcessManager:
260
266
  if self.is_master and not self.is_simulation:
261
267
  self.start_scheduler()
262
268
  if self.enable_mindx:
263
- self.msmgr.start()
269
+ self.handler.start()
264
270
  else:
265
271
  self.start_workers()
266
272
  if self.join:
@@ -382,8 +388,7 @@ class _ProcessManager:
382
388
  logger.error(f"Scheduler process {self.msn_process.pid} exit with exception.")
383
389
 
384
390
  if has_exception:
385
- logger.info("Analyzing exception log...")
386
- self._analyze_log()
391
+ self._analyze_sched_log()
387
392
  raise RuntimeError("Distributed job exited with exception. Please check logs in "
388
393
  f"directory: {self.log_dir}.")
389
394
 
@@ -583,26 +588,13 @@ class _ProcessManager:
583
588
  log_name = os.path.join(self.log_dir, formatted_log_name + "_" + str(index) + ".log")
584
589
  return node_id, log_name
585
590
 
586
- def _analyze_log(self):
591
+ def _analyze_sched_log(self):
587
592
  """
588
- Analyze exception logs.
593
+ Analyze scheduler log.
589
594
  """
590
595
  scheduler_log_path = os.path.join(self.log_dir, "scheduler.log")
591
- time_out_node_ids = []
592
596
  if os.path.exists(scheduler_log_path):
593
- with open(scheduler_log_path, "r") as log:
594
- scheduler_log = log.read()
595
- # Filter out abnormal logs.
596
- time_out_node_log = re.findall(r"node: .* is timed out", scheduler_log)
597
-
598
- # Filter out node ids of the processes which exit abnormally.
599
- def node_id_splitter(node_id):
600
- return re.split(" is timed out", re.split("node: ", node_id)[1])[0]
601
- for node_id in time_out_node_log:
602
- time_out_node_ids.append(node_id_splitter(node_id))
603
- logger.error(f"Time out nodes are {time_out_node_ids}")
604
-
605
- os.system(f"grep -rn -E 'ERROR|CRITICAL|Traceback|Error' -C 5 {self.log_dir}")
597
+ os.system(f"cat {scheduler_log_path} | grep -E 'ERROR|CRITICAL|Traceback|Error' -C 5")
606
598
 
607
599
  def format_worker_log_name(self):
608
600
  """
@@ -30,7 +30,7 @@ def _generate_cmd(cmd, cmd_args, output_name):
30
30
 
31
31
  """
32
32
  if cmd not in ['python', 'pytest', 'python3']:
33
- # If user don't set binary file name, defaulty use 'python' to launch the job.
33
+ # If user don't set binary file name, defaultly use 'python' to launch the job.
34
34
  command = f"python {cmd} {' '.join(cmd_args)} > {output_name} 2>&1 &"
35
35
  else:
36
36
  command = f"{cmd} {' '.join(cmd_args)} > {output_name} 2>&1 &"
@@ -42,7 +42,7 @@ def _generate_cmd_args_list(cmd, cmd_args):
42
42
  Generates arguments list for 'Popen'. It consists of a binary file name and subsequential arguments.
43
43
  """
44
44
  if cmd not in ['python', 'pytest', 'python3']:
45
- # If user don't set binary file name, defaulty use 'python' to launch the job.
45
+ # If user don't set binary file name, defaultly use 'python' to launch the job.
46
46
  return ['python'] + [cmd] + cmd_args
47
47
  return [cmd] + cmd_args
48
48
 
@@ -55,7 +55,7 @@ def _generate_cmd_args_list_with_core(cmd, cmd_args, affinity_cpu_str):
55
55
  taskset_args = ['taskset'] + ['-c'] + [affinity_cpu_str]
56
56
  final_cmd = []
57
57
  if cmd not in ['python', 'pytest', 'python3']:
58
- # If user don't set binary file name, defaulty use 'python' to launch the job.
58
+ # If user don't set binary file name, defaultly use 'python' to launch the job.
59
59
  final_cmd = taskset_args + ['python'] + [cmd] + cmd_args
60
60
  else:
61
61
  final_cmd = taskset_args + [cmd] + cmd_args
@@ -143,8 +143,14 @@ def _parse_global_device_to_cpu_map(local_rank_id, physical_device_id, device_to
143
143
  Parse the global device_to_cpu_map and return a cpu list for assigned local_rank_id.
144
144
 
145
145
  """
146
+ if local_rank_id >= len(list(device_to_cpu_map.keys())):
147
+ logger.warning(f"Cannot find process[{local_rank_id}] in args '--bind_core'. "
148
+ "Will not launch process with taskset.")
149
+ return ""
146
150
  input_device_id = int(list(device_to_cpu_map.keys())[local_rank_id].replace("device", ""))
147
151
  if physical_device_id != input_device_id:
152
+ logger.warning(f"Cannot find physical_device_id[{physical_device_id}] for process[{local_rank_id}] "
153
+ "in args '--bind_core'. Will not launch process with taskset.")
148
154
  return ""
149
155
  affinity_cpu_list = list(device_to_cpu_map.values())[local_rank_id]
150
156
  affinity_cpu_str = ",".join(affinity_cpu_list)
@@ -212,8 +218,6 @@ def _generate_bind_core_strategy(local_rank_id, device_to_cpu_map, arg_bind_core
212
218
  if isinstance(arg_bind_core, dict):
213
219
  affinity_cpu_str = _parse_global_device_to_cpu_map(local_rank_id, physical_device_id, arg_bind_core)
214
220
  if not affinity_cpu_str:
215
- logger.warning(f"Failed to find physical_device_id[{physical_device_id}] for "
216
- f"process[{local_rank_id}]. Will not launch process with taskset.")
217
221
  return None
218
222
  elif arg_bind_core is True:
219
223
  cpu_list_for_device = device_to_cpu_map.get(physical_device_id, [])
@@ -1,23 +1,21 @@
1
- # Copyright 2024 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """
16
- Provide llm boost for inference, such as LlamaBoost.
17
- """
18
- from __future__ import absolute_import
19
-
20
- from mindspore.experimental.llm_boost.atb.llama_boost import LlamaBoost
21
- from mindspore.experimental.llm_boost.atb.qwen_boost import QwenBoost
22
-
23
- __all__ = ['LlamaBoost', 'QwenBoost']
1
+ # Copyright 2025 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+
16
+ """distributed init"""
17
+ from mindspore.parallel.distributed.distributed_data_parallel import DistributedDataParallel
18
+
19
+ __all__ = [
20
+ "DistributedDataParallel",
21
+ ]