mindspore 2.4.0__cp310-none-any.whl → 2.4.1__cp310-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (114) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_c_dataengine.cpython-310-aarch64-linux-gnu.so +0 -0
  3. mindspore/_c_expression.cpython-310-aarch64-linux-gnu.so +0 -0
  4. mindspore/bin/cache_admin +0 -0
  5. mindspore/bin/cache_server +0 -0
  6. mindspore/common/initializer.py +51 -15
  7. mindspore/common/parameter.py +18 -4
  8. mindspore/common/tensor.py +15 -49
  9. mindspore/communication/comm_func.py +7 -7
  10. mindspore/context.py +9 -0
  11. mindspore/include/mindapi/base/format.h +13 -0
  12. mindspore/lib/libdnnl.so.2 +0 -0
  13. mindspore/lib/libmindspore_backend.so +0 -0
  14. mindspore/lib/libmindspore_common.so +0 -0
  15. mindspore/lib/libmindspore_core.so +0 -0
  16. mindspore/lib/libmindspore_glog.so.0 +0 -0
  17. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  18. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  19. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  20. mindspore/lib/libmindspore_ops.so +0 -0
  21. mindspore/lib/libopencv_core.so.4.5 +0 -0
  22. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +10 -10
  23. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +8 -8
  24. mindspore/lib/plugin/ascend/custom_compiler/setup.py +1 -1
  25. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  26. mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so +0 -0
  27. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
  28. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/utils/rt/base/types.h +5 -5
  29. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
  30. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
  31. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal.so +0 -0
  32. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal_static.a +0 -0
  33. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +1 -0
  34. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/paged_attention_op.h +6 -1
  35. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/rms_norm_op.h +4 -3
  36. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
  37. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
  38. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layer_norm_impl.so +0 -0
  39. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
  40. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_acme_impl.so +0 -0
  41. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_impl.so +0 -0
  42. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_old_impl.so +0 -0
  43. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
  44. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_old_impl.so +0 -0
  45. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
  46. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
  47. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
  48. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
  49. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmulti_weight_matmul_kernel_impl.so +0 -0
  50. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
  51. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
  52. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_impl.so +0 -0
  53. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_old_impl.so +0 -0
  54. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
  55. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  56. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
  57. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bsh_full_mix.o +0 -0
  58. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  59. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  60. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  61. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  62. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_bf16.o +0 -0
  63. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp16.o +0 -0
  64. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp32.o +0 -0
  65. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_bf16.o +0 -0
  66. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp16.o +0 -0
  67. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp32.o +0 -0
  68. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bnsd_mix.o +0 -0
  69. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bsh_mix.o +0 -0
  70. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bnsd_mix.o +0 -0
  71. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bsh_mix.o +0 -0
  72. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
  73. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
  74. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  75. mindspore/mint/__init__.py +490 -2
  76. mindspore/mint/nn/__init__.py +2 -2
  77. mindspore/mint/optim/adamw.py +6 -14
  78. mindspore/nn/cell.py +1 -3
  79. mindspore/nn/layer/basic.py +24 -7
  80. mindspore/nn/layer/embedding.py +31 -14
  81. mindspore/nn/optim/tft_wrapper.py +12 -15
  82. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
  83. mindspore/ops/_grad_experimental/grad_comm_ops.py +20 -1
  84. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +6 -0
  85. mindspore/ops/auto_generate/gen_extend_func.py +33 -0
  86. mindspore/ops/auto_generate/gen_ops_def.py +52 -3
  87. mindspore/ops/auto_generate/gen_ops_prim.py +155 -6
  88. mindspore/ops/function/array_func.py +2 -0
  89. mindspore/ops/function/math_func.py +7 -1
  90. mindspore/ops/function/random_func.py +221 -7
  91. mindspore/ops/operations/__init__.py +1 -1
  92. mindspore/ops/operations/array_ops.py +3 -1
  93. mindspore/ops/operations/comm_ops.py +21 -0
  94. mindspore/ops/operations/manually_defined/ops_def.py +8 -10
  95. mindspore/parallel/_auto_parallel_context.py +3 -1
  96. mindspore/parallel/_cell_wrapper.py +2 -0
  97. mindspore/parallel/_tensor.py +46 -2
  98. mindspore/parallel/_utils.py +40 -21
  99. mindspore/parallel/transform_safetensors.py +196 -43
  100. mindspore/profiler/profiling.py +5 -1
  101. mindspore/run_check/_check_version.py +4 -2
  102. mindspore/train/_utils.py +92 -32
  103. mindspore/train/callback/_checkpoint.py +12 -9
  104. mindspore/train/callback/_on_request_exit.py +12 -1
  105. mindspore/train/callback/_tft_register.py +27 -4
  106. mindspore/train/dataset_helper.py +10 -2
  107. mindspore/train/model.py +20 -0
  108. mindspore/train/serialization.py +8 -18
  109. mindspore/version.py +1 -1
  110. {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +8 -6
  111. {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +114 -114
  112. {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +0 -0
  113. {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
  114. {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0
@@ -590,6 +590,8 @@ def _apply_operator(operator_name):
590
590
  Returns:
591
591
  The data of tensor after apply operator.
592
592
  """
593
+ if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
594
+ numpy_data = numpy_data[:]
593
595
  if not isinstance(numpy_data, np.ndarray):
594
596
  raise TypeError("The data should be a numpy.ndarray.")
595
597
  _check_operator(reshape_op)
@@ -629,8 +631,6 @@ def _apply_operator(operator_name):
629
631
  Returns:
630
632
  The data of tensor after apply operator.
631
633
  """
632
- if not isinstance(numpy_data, np.ndarray):
633
- raise TypeError("The data should be a numpy.ndarray.")
634
634
  _check_operator(slice_op)
635
635
  if len(slice_op[1]) % 3 != 0:
636
636
  raise ValueError("The slice operator information is wrong.")
@@ -701,6 +701,50 @@ def _load_tensor_shape(dev_mat, tensor_map, full_shape=None, rank_id=-1):
701
701
  return tuple(res)
702
702
 
703
703
 
704
+ def _count_tensor_shape(dev_mat, tensor_map, full_shape=None, rank_id=-1):
705
+ """get tensor shape"""
706
+ if rank_id == -1:
707
+ rank = get_rank()
708
+ else:
709
+ rank = rank_id
710
+ tensor_strategy = _get_tensor_strategy(dev_mat, tensor_map)
711
+ tensor_slice_index = _get_tensor_slice_index(dev_mat, tensor_strategy, tensor_map, rank)
712
+ np_tensor_list = _chunk_shape_by_strategy(full_shape, tensor_strategy)
713
+ np_tensor_slice_index = np_tensor_list[int(tensor_slice_index)]
714
+ res = []
715
+ for index in np_tensor_slice_index:
716
+ res.append(index[1] - index[0])
717
+ return res
718
+
719
+
720
+ def _load_tensor_shape_by_layout(tensor, layout, rank_id):
721
+ """get tensor shape by layout"""
722
+ if not isinstance(layout, tuple):
723
+ raise TypeError("The layout should be tuple! layout is {}".format(layout))
724
+ if len(layout) < 7:
725
+ raise ValueError("The length of layout must be larger than 6! layout is {}".format(layout))
726
+ slice_shape = layout[2]
727
+ if slice_shape:
728
+ return slice_shape
729
+ tensor_map = layout[1]
730
+ if not tensor_map:
731
+ return tensor.shape
732
+ dev_mat = layout[0]
733
+ uniform_split = layout[4]
734
+ group = layout[5]
735
+ full_shape = layout[6]
736
+ if not full_shape:
737
+ full_shape = tensor.shape
738
+ if uniform_split == 0:
739
+ raise RuntimeError("The load tensor only support uniform split now")
740
+ tensor_slice_shape = _count_tensor_shape(dev_mat, tensor_map, full_shape, rank_id)
741
+ if group:
742
+ # get a totally shard tensor slice for parallel optimizer
743
+ size = get_group_size(group)
744
+ tensor_slice_shape[0] //= size
745
+ return tensor_slice_shape
746
+
747
+
704
748
  def _chunk_shape_by_strategy(full_shape, strategy):
705
749
  """chunk shape by strategy"""
706
750
  shape = []
@@ -14,6 +14,7 @@
14
14
  # ============================================================================
15
15
  """Utils of auto parallel"""
16
16
  import os
17
+ from time import perf_counter
17
18
  from importlib import import_module
18
19
  import numpy as np
19
20
  import mindspore as ms
@@ -27,7 +28,7 @@ from mindspore.communication._comm_helper import _is_initialized
27
28
  from mindspore.parallel._auto_parallel_context import auto_parallel_context
28
29
  from mindspore.common.seed import get_seed
29
30
  from mindspore._c_expression import GraphExecutor_
30
- from mindspore.parallel._tensor import _load_tensor_by_layout
31
+ from mindspore.parallel._tensor import _load_tensor_by_layout, _load_tensor_shape_by_layout
31
32
 
32
33
  SUPPORTED_TUPLE_IN_TUPLE_STRATEGY = ["GroupedMatmul", "FusedInferAttentionScore", "Custom"]
33
34
 
@@ -104,31 +105,49 @@ def _need_to_full():
104
105
  return not _get_full_batch()
105
106
 
106
107
 
108
+ class ParallelParamInitProfCtx:
109
+ """Collect parallel param initialization performance context mgr."""
110
+
111
+ def __init__(self, parameter, func_name):
112
+ self.parameter = parameter
113
+ self.func_name = func_name
114
+ self.start_timestamp = None
115
+
116
+ def __enter__(self):
117
+ self.start_timestamp = perf_counter()
118
+ return self
119
+
120
+ def __exit__(self, exc_type, exc_value, exc_traceback):
121
+ end_timestamp = perf_counter()
122
+ duration = end_timestamp - self.start_timestamp
123
+ if os.getenv("MS_DEV_PARAM_INIT_PROF_COLLECT"):
124
+ logger.warning(f"{self.func_name}: {self.parameter.name}, shape: {self.parameter.shape}, "
125
+ f"sliced: {self.parameter.sliced}, duration: {duration}")
126
+
127
+
107
128
  def _slice_parameter(parameter, phase, layout):
108
129
  """Slice python parameter obj according to the layout."""
109
- is_train_phase = phase.startswith('train')
110
- is_prefill_phase = phase.startswith('prefill')
111
- if layout is not None and parameter.from_ckpt and not is_train_phase:
112
- is_opt_shard_group = layout[5]
113
- if not parameter.sliced and is_prefill_phase and is_opt_shard_group:
130
+ # graph_executor.updata_param_node_default_input(phase, {parameter.name: parameter})
131
+ if getattr(parameter, "init_param", False):
132
+ if layout is None:
133
+ parameter.sliced = True
134
+ return
135
+ if not parameter.sliced:
136
+ rank = get_rank()
137
+ new_tensor_shape = _load_tensor_shape_by_layout(parameter, layout, rank)
138
+ parameter.shape = new_tensor_shape
139
+ else:
140
+ graph_executor = GraphExecutor_.get_instance()
141
+ new_param = parameter.init_data(layout, set_sliced=True)
142
+ parameter = new_param
143
+ graph_executor.updata_param_node_default_input(phase, {parameter.name: parameter})
144
+ if layout is None:
145
+ parameter.sliced = True
146
+ return
147
+ if not parameter.sliced:
114
148
  rank = get_rank()
115
149
  new_tensor = _load_tensor_by_layout(parameter, layout, rank)
116
150
  parameter.set_data(new_tensor, True)
117
- return
118
- layout_shape = layout[2]
119
- parameter.shape = tuple(layout_shape)
120
- return
121
- graph_executor = GraphExecutor_.get_instance()
122
- new_param = parameter.init_data(layout, set_sliced=True)
123
- parameter = new_param
124
- graph_executor.updata_param_node_default_input(phase, {parameter.name: parameter})
125
- if layout is None:
126
- parameter.sliced = True
127
- return
128
- if not parameter.sliced:
129
- rank = get_rank()
130
- new_tensor = _load_tensor_by_layout(parameter, layout, rank)
131
- parameter.set_data(new_tensor, True)
132
151
 
133
152
 
134
153
  def _slice_tensor(tensor, layout, rank_id):
@@ -32,7 +32,7 @@ from mindspore.parallel._parallel_serialization import _get_device_num_from_stra
32
32
  from mindspore.parallel._tensor import _get_tensor_strategy, _construct_from_to_tensor_layout, \
33
33
  _get_needed_rank_transform_operator_map_by_layouts, \
34
34
  _generate_transform_operator_stack, _apply_tensor_transform_operators, _construct_tensor_layout_for_opt_shard, \
35
- _extract_layout_item, _load_tensor_shape
35
+ _extract_layout_item, _load_tensor_shape, _apply_operator
36
36
  from mindspore.parallel._parallel_serialization import _build_searched_strategy, _load_protobuf_strategy, \
37
37
  _convert_to_list
38
38
 
@@ -375,12 +375,10 @@ def _transform_stage_safetensors(src_strategy_dict, dst_strategy_dict, ckpt_pref
375
375
  if int(needed_rank) not in all_safetensor_files_map:
376
376
  raise ValueError("The safetensor file of rank{} is needed for converting rank{}'s safetensor, "
377
377
  "but it is missing.".format(needed_rank, rank))
378
- if process_num > len(needed_rank_list_map):
378
+ dst_stage_num = _extract_pipeline_stage_num(dst_strategy_dict)
379
+ if not (len(needed_rank_list_map) == 1 and dst_stage_num > 1) and process_num > len(needed_rank_list_map):
379
380
  ms.log.warning("The value of process_num cannot be greater than that of needed_rank_list_map.")
380
381
  process_num = len(needed_rank_list_map)
381
- dst_stage_num = _extract_pipeline_stage_num(dst_strategy_dict)
382
- if len(needed_rank_list_map) == 1 and dst_stage_num > 1:
383
- process_num = dst_stage_num
384
382
  _transform_safetensors_with_parallel(needed_rank_list_map, all_safetensor_files_map, src_stage_device_num,
385
383
  dst_stage_device_num, src_strategy_dict, dst_strategy_dict,
386
384
  origin_src_strategy_list, origin_dst_strategy_list, ckpt_prefix,
@@ -452,18 +450,18 @@ def _transform_safetensors_with_parallel(needed_rank_list_map, all_safetensor_fi
452
450
  """
453
451
  Transforms safetensors files to a specified format using parallel processing.
454
452
  """
455
- part_list_dict = _distribute_files_by_size(all_safetensor_files_map, needed_rank_list_map, process_num)
456
-
457
453
  # cal param name for every pipeline, save in pipe_param_list.
458
454
  pipe_num = _extract_pipeline_stage_num(dst_strategy_dict)
459
455
  pipe_param_list = [None for _ in range(max(pipe_num, process_num))]
460
456
  if len(needed_rank_list_map) == 1 and pipe_num > 1:
457
+ process_num = pipe_num
461
458
  pipe_param_list = [[] for _ in range(pipe_num)]
462
459
  layout_map = _convert_to_list(dst_strategy_dict)
463
460
 
464
461
  for name, layout in layout_map.items():
465
462
  pipe_param_list[layout[6][0]].append(name)
466
463
 
464
+ part_list_dict = _distribute_files_by_size(all_safetensor_files_map, needed_rank_list_map, process_num)
467
465
  processes = []
468
466
  for i in range(process_num):
469
467
  p = mp.Process(target=_transform_safetensors_single, args=(
@@ -476,15 +474,74 @@ def _transform_safetensors_with_parallel(needed_rank_list_map, all_safetensor_fi
476
474
  p.join()
477
475
 
478
476
 
477
+ def _count_redundancy_list(rank_num, param_name, redundancy_dict, device_num):
478
+ """Obtain the specified redundant group."""
479
+ redundancy_tuple = redundancy_dict.get(param_name)
480
+ for rank_list in redundancy_tuple:
481
+ for rank in rank_list:
482
+ if rank_num % device_num == rank % device_num:
483
+ return set(rank_list)
484
+ return set()
485
+
486
+
487
+ def _find_remove_redundancy_rank_id(pipe_param_list, single_param_dict, file_dict, saftensor_dict, redundancy_dict,
488
+ needed_rank, device_num):
489
+ """Find the rank_id under redundant groups."""
490
+ for param_name in pipe_param_list:
491
+ rank_num = int(needed_rank)
492
+ redundancy_ranks = _count_redundancy_list(rank_num, param_name, redundancy_dict, device_num)
493
+ open_file_id = None
494
+ if single_param_dict.get(param_name) is None:
495
+ continue
496
+ for real_rank in single_param_dict[param_name]:
497
+ for redundancy_rank in redundancy_ranks:
498
+ if real_rank % device_num == redundancy_rank % device_num:
499
+ open_file_id = real_rank
500
+ break
501
+ if open_file_id is not None:
502
+ output = file_dict[open_file_id].get_tensor(param_name)
503
+ saftensor_dict[param_name] = output
504
+ else:
505
+ raise ValueError(f"For _transform_safetensors_single, {param_name} should be in "
506
+ f"{redundancy_ranks}, but in {single_param_dict[param_name]}.")
507
+
508
+
479
509
  def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map, src_stage_device_num,
480
510
  dst_stage_device_num,
481
511
  src_strategy_dict, dst_strategy_dict, origin_src_strategy_list,
482
512
  origin_dst_strategy_list,
483
513
  ckpt_prefix, dst_safetensors_dir, output_format,
484
- _transform_param_list, pipe_param_list=None, file_index=None, unified_flag=False):
514
+ _transform_param_list, pipe_param_list=None, file_index=None, unified_flag=False,
515
+ src_strategy_file=None):
485
516
  """
486
517
  Transforms safetensors files to a specified format without using parallel processing.
487
518
  """
519
+ if src_strategy_file is not None:
520
+ from mindspore.train._utils import get_parameter_redundancy
521
+ redundancy_dict_tmp = get_parameter_redundancy(src_strategy_file)
522
+ redundancy_dict = {}
523
+ device_num = 0
524
+ for param_name, redundancy in redundancy_dict_tmp.items():
525
+ if device_num == 0:
526
+ device_num = max(max(redundancy)) + 1
527
+ origin_param_name = param_name
528
+ pipeline_stage = 0
529
+ if "-" in param_name:
530
+ pipeline_stage, origin_param_name = param_name.split("-")
531
+ pipeline_stage = int(pipeline_stage)
532
+ redundancy_new = tuple(
533
+ (tuple(x + pipeline_stage * device_num for x in subtuple)) for subtuple in redundancy)
534
+ redundancy_dict[origin_param_name] = redundancy_new
535
+ file_dict = {}
536
+ single_param_dict = {}
537
+ for file_id, _ in all_safetensor_files_map.items():
538
+ f = safe_open(all_safetensor_files_map.get(file_id), framework="np")
539
+ file_dict[file_id] = f
540
+ for param_name in f.keys():
541
+ if param_name not in single_param_dict.keys():
542
+ single_param_dict[param_name] = {file_id}
543
+ else:
544
+ single_param_dict[param_name].add(file_id)
488
545
  src_strategy_list_keys = _convert_to_list(src_strategy_dict).keys() if src_strategy_dict else []
489
546
  dst_strategy_list_keys = _convert_to_list(dst_strategy_dict).keys() if dst_strategy_dict else []
490
547
  for needed_rank_list_key, transform_rank_list in needed_rank_list_map.items():
@@ -494,19 +551,23 @@ def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map
494
551
  for needed_rank in needed_rank_list:
495
552
  if pipe_param_list:
496
553
  saftensor_dict = dict()
497
- with safe_open(all_safetensor_files_map.get(int(needed_rank)), framework="np") as f:
498
- if not unified_flag:
499
- all_param_name_set = set(f.keys())
500
- src_param_name_set = set(src_strategy_list_keys)
501
- dst_param_name_set = set(dst_strategy_list_keys)
502
- hyper_param_set = all_param_name_set - (src_param_name_set & dst_param_name_set)
503
- pipe_param_list.extend(list(hyper_param_set))
504
- for param_name in pipe_param_list:
505
- if param_name not in f.keys():
506
- # param not in ckpt file, check reason
507
- continue
508
- output = f.get_tensor(param_name)
509
- saftensor_dict[param_name] = output
554
+ if src_strategy_file is not None:
555
+ _find_remove_redundancy_rank_id(pipe_param_list, single_param_dict, file_dict, saftensor_dict,
556
+ redundancy_dict, needed_rank, device_num)
557
+ else:
558
+ with safe_open(all_safetensor_files_map.get(int(needed_rank)), framework="np") as f:
559
+ if not unified_flag:
560
+ all_param_name_set = set(f.keys())
561
+ src_param_name_set = set(src_strategy_list_keys)
562
+ dst_param_name_set = set(dst_strategy_list_keys)
563
+ hyper_param_set = all_param_name_set - (src_param_name_set & dst_param_name_set)
564
+ pipe_param_list.extend(list(hyper_param_set))
565
+ for param_name in pipe_param_list:
566
+ if param_name not in f.keys():
567
+ # param not in ckpt file, check reason
568
+ continue
569
+ output = f.get_tensor(param_name)
570
+ saftensor_dict[param_name] = output
510
571
  else:
511
572
  saftensor_dict = load_file(all_safetensor_files_map.get(int(needed_rank)))
512
573
  for param_name, param in saftensor_dict.items():
@@ -527,7 +588,7 @@ def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map
527
588
  local_rank_id = transform_rank % dst_stage_device_num
528
589
  transform_param_dict = _transform_parallel_safetensor(local_rank_id, param_total_dict,
529
590
  param_attr_dict, src_strategy_list, dst_strategy_list,
530
- param_total_dict_keys)
591
+ param_total_dict_keys, src_strategy_file)
531
592
  if file_index is not None:
532
593
  save_safetensor_file = f"part{file_index}.{output_format}"
533
594
  save_safetensor_file_dir = dst_safetensors_dir
@@ -674,7 +735,7 @@ def transform_safetensors_by_rank(rank_id, safetensor_files_map, save_safetensor
674
735
  save_file(transform_param_dict, save_safetensor_file_name)
675
736
 
676
737
 
677
- def _collect_safetensor_files(src_safetensors_dir, format='safetensors'):
738
+ def _collect_safetensor_files(src_safetensors_dir, format='safetensors', file_suffix=None):
678
739
  """
679
740
  Collects all safetensors files from the specified directory and its subdirectories.
680
741
  """
@@ -692,7 +753,10 @@ def _collect_safetensor_files(src_safetensors_dir, format='safetensors'):
692
753
  format(safetensor_dir))
693
754
  continue
694
755
  rank_id = int(rank_id_str)
695
- safetensor_file_name = os.path.join(safetensor_dir, f"*.{format}")
756
+ if file_suffix is None:
757
+ safetensor_file_name = os.path.join(safetensor_dir, f"*.{format}")
758
+ else:
759
+ safetensor_file_name = os.path.join(safetensor_dir, f"*{file_suffix}.{format}")
696
760
  rank_ckpts = glob.glob(safetensor_file_name)
697
761
  rank_ckpts.sort()
698
762
  for safetensor_file in rank_ckpts:
@@ -727,7 +791,7 @@ def load_file_by_param_name(filename, parme_name_list):
727
791
 
728
792
 
729
793
  def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, src_strategy_list,
730
- dst_strategy_list, param_total_dict_keys=None):
794
+ dst_strategy_list, param_total_dict_keys=None, src_strategy_file=None):
731
795
  """
732
796
  Transform model parallel dimension for distributed safetensor files.
733
797
  """
@@ -779,7 +843,7 @@ def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, s
779
843
 
780
844
  # when the from_layout is less devices, the safetensor_map for map[device_num] should using map[0]
781
845
  device_list = list(range(0, np.prod(from_tensor_layout[0])))
782
- if rank_id % device_num not in param_attr_dict[param_name]:
846
+ if rank_id % device_num not in param_attr_dict[param_name] and src_strategy_file is None:
783
847
  raise ValueError("The safetensor of rank {} is missing.".format(rank_id % device_num))
784
848
  param_rank_map = _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout,
785
849
  device_list, rank_id)
@@ -801,7 +865,7 @@ def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, s
801
865
  return transform_param_dict
802
866
 
803
867
 
804
- def unified_safetensors(src_dir, src_strategy_file, dst_dir):
868
+ def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundancy=True, file_suffix=None):
805
869
  """
806
870
  Merge multiple safetensor files into a unified safetensor file.
807
871
 
@@ -809,6 +873,10 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir):
809
873
  src_dir (str): Source weight saving directory.
810
874
  src_strategy_file (str): Source weight segmentation strategy file.
811
875
  dst_dir (str): Target save directory.
876
+ merge_with_redundancy (bool, optional): Whether the merged source weight files are de-duplicated and
877
+ saved safetensors files. Default: ``True``, indicating that the merged source weight files are complete.
878
+ file_suffix (str, optional): Specify the filename suffix for merging safetensors files. Default: ``None``,
879
+ meaning all safetensors files in the source weight directory will be merged.
812
880
 
813
881
  Raises:
814
882
  ValueError: If the safetensors file of rank is missing.
@@ -827,8 +895,8 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir):
827
895
  _make_dir(dst_dir, "path")
828
896
  if os.path.isfile(src_dir):
829
897
  raise ValueError("For 'unified_safetensors', the 'src_dir' can not be a file.")
830
- all_safetensor_files_map = _collect_safetensor_files(src_dir)
831
- all_ckpt_files_map = _collect_safetensor_files(src_dir, format='ckpt')
898
+ all_safetensor_files_map = _collect_safetensor_files(src_dir, format="safetensors", file_suffix=file_suffix)
899
+ all_ckpt_files_map = _collect_safetensor_files(src_dir, format="ckpt", file_suffix=file_suffix)
832
900
  if all_safetensor_files_map and all_ckpt_files_map:
833
901
  raise ValueError("For 'unified_safetensors', the 'src_dir' cannot contain "
834
902
  "both ckpt file and safetensors file simultaneously")
@@ -847,14 +915,21 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir):
847
915
  layout_map = _convert_to_list(src_strategy_dict)
848
916
 
849
917
  total_size = 0
918
+ actual_params = set()
850
919
  for _, file_name in all_safetensor_files_map.items():
851
920
  total_size += os.path.getsize(file_name) / 1024 / 1024 / 1024
921
+ with safe_open(file_name, framework="np") as f:
922
+ actual_params.update(f.keys())
852
923
  split_num = math.ceil(total_size / 3)
924
+ params_to_store = actual_params & set(layout_map.keys())
853
925
 
854
- name_list = list(layout_map.keys())
926
+ name_list = []
927
+ for name in list(params_to_store):
928
+ if name.startswith("accu_grads"):
929
+ continue
930
+ name_list.append(name)
855
931
  split_list = _split_list(name_list, split_num)
856
932
 
857
- all_safetensor_files_map = _collect_safetensor_files(src_dir)
858
933
  with safe_open(all_safetensor_files_map.get(0), framework="np") as f:
859
934
  all_key = f.keys()
860
935
  hyper_parameter = set(all_key) - set(name_list)
@@ -878,12 +953,14 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir):
878
953
  res = [i for i in range(split_num)]
879
954
  res = _split_list(res, max_process)
880
955
  processes = []
881
-
956
+ src_strategy_name = None
957
+ if not merge_with_redundancy:
958
+ src_strategy_name = src_strategy_file
882
959
  for i in range(max_process):
883
960
  p = mp.Process(target=_transform_safetensors_single_semaphore, args=(
884
961
  needed_rank_list_map, all_safetensor_files_map, src_stage_device_num, dst_stage_device_num,
885
962
  src_strategy_dict, None, origin_src_strategy_list, origin_dst_strategy_list,
886
- "", dst_dir, "safetensors", None, split_list, res[i], True))
963
+ "", dst_dir, "safetensors", None, split_list, res[i], True, src_strategy_name))
887
964
  p.start()
888
965
  processes.append(p)
889
966
  for p in processes:
@@ -897,13 +974,13 @@ def _transform_safetensors_single_semaphore(needed_rank_list_map, all_safetensor
897
974
  origin_dst_strategy_list,
898
975
  ckpt_prefix, dst_safetensors_dir, output_format,
899
976
  _transform_param_list, pipe_param_list=None, file_index=None,
900
- unified_flag=False):
977
+ unified_flag=False, src_strategy_file=None):
901
978
  for i in file_index:
902
979
  _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map, src_stage_device_num,
903
980
  dst_stage_device_num, src_strategy_dict, dst_strategy_dict,
904
981
  origin_src_strategy_list,
905
982
  origin_dst_strategy_list, ckpt_prefix, dst_safetensors_dir, output_format,
906
- _transform_param_list, pipe_param_list[i], i, unified_flag)
983
+ _transform_param_list, pipe_param_list[i], i, unified_flag, src_strategy_file)
907
984
 
908
985
 
909
986
  def _split_list(split_list, split_num):
@@ -911,6 +988,45 @@ def _split_list(split_list, split_num):
911
988
  return [array.tolist() for array in split_array]
912
989
 
913
990
 
991
+ def _apply_sf_obj_transform_operators(transform_operator_stack, sf_obj, device_num):
992
+ """apply safetensors object operators"""
993
+ if not transform_operator_stack:
994
+ return sf_obj[:]
995
+ level = transform_operator_stack[-1][1]
996
+ level_operators = []
997
+ while True:
998
+ if not transform_operator_stack or (level != transform_operator_stack[-1][1]):
999
+ tmp_tensor_dict = {}
1000
+ if not level_operators:
1001
+ continue
1002
+ op_name = level_operators[0][2][0]
1003
+ for operator_pair in level_operators:
1004
+ rank_id = operator_pair[0]
1005
+ cur_level = operator_pair[1]
1006
+ operator = operator_pair[2]
1007
+ if operator[0] != op_name:
1008
+ raise ValueError("The operator in the same level should be equal in the transform tensor operator "
1009
+ "list, but the find {} and {} in level {}".format(op_name, operator[0], cur_level))
1010
+ if operator[0] != "AllConcat":
1011
+ sf_obj = _apply_operator(operator[0])(sf_obj, operator)
1012
+ continue
1013
+ for rank in operator[1][:-1]:
1014
+ if rank % device_num not in sf_obj:
1015
+ raise ValueError("The checkpoint file of rank {} is missing.".format(rank % device_num))
1016
+ allgather_list = [sf_obj for _ in operator[1][:-1]]
1017
+ tmp_tensor_dict[rank_id % device_num] = _apply_operator(operator[0])(allgather_list, operator)
1018
+ if op_name == "AllConcat":
1019
+ for rank, value in tmp_tensor_dict.items():
1020
+ sf_obj = value
1021
+ level_operators.clear()
1022
+ if not transform_operator_stack:
1023
+ break
1024
+ operator_pair = transform_operator_stack.pop()
1025
+ level = operator_pair[1]
1026
+ level_operators.append(operator_pair)
1027
+ return sf_obj
1028
+
1029
+
914
1030
  def _load_parallel_checkpoint(total_safetensors_dir, dst_strategy_file, net=None, dst_safetensors_dir=None,
915
1031
  rank_id=None):
916
1032
  """load parallel safetensors by merged file."""
@@ -930,7 +1046,9 @@ def _load_parallel_checkpoint(total_safetensors_dir, dst_strategy_file, net=None
930
1046
  param_list = param_name_map.keys()
931
1047
 
932
1048
  total_param = dict()
933
-
1049
+ dst_stage_device_num = np.prod(dst_strategy_list.get(list(dst_strategy_list.keys())[0])[0]) if dst_strategy_list \
1050
+ is not None else 1
1051
+ local_rank_id = rank_id % dst_stage_device_num
934
1052
  for param_name in param_list:
935
1053
  if param_name not in param_name_map:
936
1054
  continue
@@ -939,19 +1057,54 @@ def _load_parallel_checkpoint(total_safetensors_dir, dst_strategy_file, net=None
939
1057
  if param_name not in f.keys():
940
1058
  continue
941
1059
  sf_obj = f.get_slice(param_name)
942
- param_dict = dict()
943
- param_dict[param_name] = sf_obj
944
1060
 
1061
+ tensor_shape = sf_obj.get_shape()
1062
+ from_dev_matrix = [1]
1063
+ from_tensor_map = [-1] * len(tensor_shape)
1064
+ from_opt_shard_step = 0
1065
+ from_opt_shard_size = 0
945
1066
  if dst_strategy_list is not None:
946
1067
  if param_name not in dst_strategy_list:
947
1068
  continue
948
- slice_op, shape = _get_slice(rank_id, sf_obj, param_name, dst_strategy_list)
1069
+ to_dev_matrix_origin, to_tensor_map_origin, to_opt_shard_step, to_opt_shard_size = _extract_layout_item(
1070
+ dst_strategy_list.get(param_name))
1071
+
1072
+ device_num = np.prod(from_dev_matrix)
1073
+ param_strategy = _get_tensor_strategy(from_dev_matrix, from_tensor_map)
1074
+ origin_tensor_shape = ()
1075
+ for i, item in enumerate(tensor_shape):
1076
+ if i == 0 and from_opt_shard_size > 0:
1077
+ origin_tensor_shape += (item * param_strategy[i] * from_opt_shard_size,)
1078
+ continue
1079
+ origin_tensor_shape += (item * param_strategy[i],)
1080
+
1081
+ from_dev_matrix, from_tensor_map, from_full_tensor_shape = _construct_tensor_layout_for_opt_shard(
1082
+ from_dev_matrix, from_tensor_map, from_opt_shard_step, from_opt_shard_size, origin_tensor_shape)
1083
+ to_dev_matrix, to_tensor_map, to_full_tensor_shape = _construct_tensor_layout_for_opt_shard(
1084
+ to_dev_matrix_origin, to_tensor_map_origin, to_opt_shard_step, to_opt_shard_size, origin_tensor_shape)
1085
+ # Convert tensor layout to same device num
1086
+ from_tensor_layout, to_tensor_layout = _construct_from_to_tensor_layout(from_full_tensor_shape,
1087
+ from_dev_matrix,
1088
+ from_tensor_map,
1089
+ to_full_tensor_shape,
1090
+ to_dev_matrix, to_tensor_map)
1091
+
1092
+ # when the from_layout is less devices, the safetensor_map for map[device_num] should using map[0]
1093
+ device_list = list(range(0, np.prod(from_tensor_layout[0])))
1094
+ param_rank_map = _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout,
1095
+ device_list, local_rank_id)
1096
+
1097
+ from_info_tuple = (from_opt_shard_size, from_dev_matrix, from_tensor_map, from_full_tensor_shape)
1098
+ to_info_tuple = (to_opt_shard_size, to_dev_matrix_origin, to_tensor_map_origin, origin_tensor_shape)
1099
+ _insert_opt_shard_reshape(param_rank_map, from_info_tuple, to_info_tuple)
1100
+ transform_operator_stack = _generate_transform_operator_stack(param_rank_map, local_rank_id)
1101
+
1102
+ slice_param = _apply_sf_obj_transform_operators(transform_operator_stack, sf_obj, device_num)
949
1103
  else:
950
- slice_op, shape = slice(None, None, None), None
951
- slice_param = sf_obj[slice_op]
952
- if shape is not None:
953
- slice_param = slice_param.reshape(shape)
1104
+ slice_param = sf_obj[:]
1105
+
954
1106
  total_param[param_name] = ms.Parameter(slice_param)
1107
+
955
1108
  if 'hyper_param.safetensors' in file_list:
956
1109
  hyper_parameter_file_name = os.path.join(total_safetensors_dir, "hyper_param.safetensors")
957
1110
  with safe_open(hyper_parameter_file_name, framework="np") as f:
@@ -31,7 +31,7 @@ from mindspore.context import get_auto_parallel_context
31
31
  from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
32
32
  import mindspore._c_expression as c_expression
33
33
  import mindspore._c_dataengine as cde
34
- from mindspore._c_expression import _framework_profiler_enable_mi
34
+ from mindspore._c_expression import _framework_profiler_enable_mi, _framework_profiler_disable_mi
35
35
  from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
36
36
  ProfilerIOException, ProfilerException, ProfilerRawFileException, ProfilerParamTypeErrorException
37
37
  from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
@@ -824,6 +824,10 @@ class Profiler:
824
824
  self._ascend_profiler.stop()
825
825
 
826
826
  self._stop_time = int(time.time() * 10000000)
827
+
828
+ if self._profile_framework:
829
+ _framework_profiler_disable_mi()
830
+
827
831
  ProfilerInfo.set_profiling_stop_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
828
832
  self._init_profiler_info()
829
833
  ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
@@ -266,8 +266,10 @@ class AscendEnvChecker(EnvChecker):
266
266
  self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
267
267
  self.ascend_opp_path = os.getenv("ASCEND_OPP_PATH")
268
268
  self.ascend_aicpu_path = os.getenv("ASCEND_AICPU_PATH")
269
- self.compiler_version = self.ascend_opp_path.split("opp")[0] + "compiler/version.info"
270
-
269
+ if not self.ascend_opp_path is None:
270
+ self.compiler_version = self.ascend_opp_path.split("opp")[0] + "compiler/version.info"
271
+ else:
272
+ self.compiler_version = ""
271
273
  # check content
272
274
  self.path_check = "/compiler/ccec_compiler/bin"
273
275
  self.python_path_check = "opp/built-in/op_impl/ai_core/tbe"