mindspore 2.4.0__cp39-none-any.whl → 2.4.1__cp39-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/common/initializer.py +51 -15
- mindspore/common/parameter.py +18 -4
- mindspore/common/tensor.py +15 -49
- mindspore/communication/comm_func.py +7 -7
- mindspore/context.py +9 -0
- mindspore/include/mindapi/base/format.h +13 -0
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_ops.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/plugin/ascend/custom_compiler/setup.py +1 -1
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/include/asdops/utils/rt/base/types.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +1 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/paged_attention_op.h +6 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/rms_norm_op.h +4 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layer_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_acme_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_310p_old_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_old_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmulti_weight_matmul_kernel_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_nz_old_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_bf16.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp16.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_bf16_fp32.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_bf16.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp16.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/matmul_add_rmsnorm/matmul_add_rmsnorm_fp16_fp32.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bnsd_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bsh_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bnsd_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bsh_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/mint/__init__.py +490 -2
- mindspore/mint/nn/__init__.py +2 -2
- mindspore/mint/optim/adamw.py +6 -14
- mindspore/nn/cell.py +1 -3
- mindspore/nn/layer/basic.py +24 -7
- mindspore/nn/layer/embedding.py +31 -14
- mindspore/nn/optim/tft_wrapper.py +12 -15
- mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
- mindspore/ops/_grad_experimental/grad_comm_ops.py +20 -1
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +6 -0
- mindspore/ops/auto_generate/gen_extend_func.py +33 -0
- mindspore/ops/auto_generate/gen_ops_def.py +52 -3
- mindspore/ops/auto_generate/gen_ops_prim.py +155 -6
- mindspore/ops/function/array_func.py +2 -0
- mindspore/ops/function/math_func.py +7 -1
- mindspore/ops/function/random_func.py +221 -7
- mindspore/ops/operations/__init__.py +1 -1
- mindspore/ops/operations/array_ops.py +3 -1
- mindspore/ops/operations/comm_ops.py +21 -0
- mindspore/ops/operations/manually_defined/ops_def.py +8 -10
- mindspore/parallel/_auto_parallel_context.py +3 -1
- mindspore/parallel/_cell_wrapper.py +2 -0
- mindspore/parallel/_tensor.py +46 -2
- mindspore/parallel/_utils.py +40 -21
- mindspore/parallel/transform_safetensors.py +196 -43
- mindspore/profiler/profiling.py +5 -1
- mindspore/run_check/_check_version.py +4 -2
- mindspore/train/_utils.py +92 -32
- mindspore/train/callback/_checkpoint.py +12 -9
- mindspore/train/callback/_on_request_exit.py +12 -1
- mindspore/train/callback/_tft_register.py +27 -4
- mindspore/train/dataset_helper.py +10 -2
- mindspore/train/model.py +20 -0
- mindspore/train/serialization.py +8 -18
- mindspore/version.py +1 -1
- {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +8 -6
- {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +109 -109
- {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +0 -0
- {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.4.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0
mindspore/parallel/_tensor.py
CHANGED
|
@@ -590,6 +590,8 @@ def _apply_operator(operator_name):
|
|
|
590
590
|
Returns:
|
|
591
591
|
The data of tensor after apply operator.
|
|
592
592
|
"""
|
|
593
|
+
if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
|
|
594
|
+
numpy_data = numpy_data[:]
|
|
593
595
|
if not isinstance(numpy_data, np.ndarray):
|
|
594
596
|
raise TypeError("The data should be a numpy.ndarray.")
|
|
595
597
|
_check_operator(reshape_op)
|
|
@@ -629,8 +631,6 @@ def _apply_operator(operator_name):
|
|
|
629
631
|
Returns:
|
|
630
632
|
The data of tensor after apply operator.
|
|
631
633
|
"""
|
|
632
|
-
if not isinstance(numpy_data, np.ndarray):
|
|
633
|
-
raise TypeError("The data should be a numpy.ndarray.")
|
|
634
634
|
_check_operator(slice_op)
|
|
635
635
|
if len(slice_op[1]) % 3 != 0:
|
|
636
636
|
raise ValueError("The slice operator information is wrong.")
|
|
@@ -701,6 +701,50 @@ def _load_tensor_shape(dev_mat, tensor_map, full_shape=None, rank_id=-1):
|
|
|
701
701
|
return tuple(res)
|
|
702
702
|
|
|
703
703
|
|
|
704
|
+
def _count_tensor_shape(dev_mat, tensor_map, full_shape=None, rank_id=-1):
|
|
705
|
+
"""get tensor shape"""
|
|
706
|
+
if rank_id == -1:
|
|
707
|
+
rank = get_rank()
|
|
708
|
+
else:
|
|
709
|
+
rank = rank_id
|
|
710
|
+
tensor_strategy = _get_tensor_strategy(dev_mat, tensor_map)
|
|
711
|
+
tensor_slice_index = _get_tensor_slice_index(dev_mat, tensor_strategy, tensor_map, rank)
|
|
712
|
+
np_tensor_list = _chunk_shape_by_strategy(full_shape, tensor_strategy)
|
|
713
|
+
np_tensor_slice_index = np_tensor_list[int(tensor_slice_index)]
|
|
714
|
+
res = []
|
|
715
|
+
for index in np_tensor_slice_index:
|
|
716
|
+
res.append(index[1] - index[0])
|
|
717
|
+
return res
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def _load_tensor_shape_by_layout(tensor, layout, rank_id):
|
|
721
|
+
"""get tensor shape by layout"""
|
|
722
|
+
if not isinstance(layout, tuple):
|
|
723
|
+
raise TypeError("The layout should be tuple! layout is {}".format(layout))
|
|
724
|
+
if len(layout) < 7:
|
|
725
|
+
raise ValueError("The length of layout must be larger than 6! layout is {}".format(layout))
|
|
726
|
+
slice_shape = layout[2]
|
|
727
|
+
if slice_shape:
|
|
728
|
+
return slice_shape
|
|
729
|
+
tensor_map = layout[1]
|
|
730
|
+
if not tensor_map:
|
|
731
|
+
return tensor.shape
|
|
732
|
+
dev_mat = layout[0]
|
|
733
|
+
uniform_split = layout[4]
|
|
734
|
+
group = layout[5]
|
|
735
|
+
full_shape = layout[6]
|
|
736
|
+
if not full_shape:
|
|
737
|
+
full_shape = tensor.shape
|
|
738
|
+
if uniform_split == 0:
|
|
739
|
+
raise RuntimeError("The load tensor only support uniform split now")
|
|
740
|
+
tensor_slice_shape = _count_tensor_shape(dev_mat, tensor_map, full_shape, rank_id)
|
|
741
|
+
if group:
|
|
742
|
+
# get a totally shard tensor slice for parallel optimizer
|
|
743
|
+
size = get_group_size(group)
|
|
744
|
+
tensor_slice_shape[0] //= size
|
|
745
|
+
return tensor_slice_shape
|
|
746
|
+
|
|
747
|
+
|
|
704
748
|
def _chunk_shape_by_strategy(full_shape, strategy):
|
|
705
749
|
"""chunk shape by strategy"""
|
|
706
750
|
shape = []
|
mindspore/parallel/_utils.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""Utils of auto parallel"""
|
|
16
16
|
import os
|
|
17
|
+
from time import perf_counter
|
|
17
18
|
from importlib import import_module
|
|
18
19
|
import numpy as np
|
|
19
20
|
import mindspore as ms
|
|
@@ -27,7 +28,7 @@ from mindspore.communication._comm_helper import _is_initialized
|
|
|
27
28
|
from mindspore.parallel._auto_parallel_context import auto_parallel_context
|
|
28
29
|
from mindspore.common.seed import get_seed
|
|
29
30
|
from mindspore._c_expression import GraphExecutor_
|
|
30
|
-
from mindspore.parallel._tensor import _load_tensor_by_layout
|
|
31
|
+
from mindspore.parallel._tensor import _load_tensor_by_layout, _load_tensor_shape_by_layout
|
|
31
32
|
|
|
32
33
|
SUPPORTED_TUPLE_IN_TUPLE_STRATEGY = ["GroupedMatmul", "FusedInferAttentionScore", "Custom"]
|
|
33
34
|
|
|
@@ -104,31 +105,49 @@ def _need_to_full():
|
|
|
104
105
|
return not _get_full_batch()
|
|
105
106
|
|
|
106
107
|
|
|
108
|
+
class ParallelParamInitProfCtx:
|
|
109
|
+
"""Collect parallel param initialization performance context mgr."""
|
|
110
|
+
|
|
111
|
+
def __init__(self, parameter, func_name):
|
|
112
|
+
self.parameter = parameter
|
|
113
|
+
self.func_name = func_name
|
|
114
|
+
self.start_timestamp = None
|
|
115
|
+
|
|
116
|
+
def __enter__(self):
|
|
117
|
+
self.start_timestamp = perf_counter()
|
|
118
|
+
return self
|
|
119
|
+
|
|
120
|
+
def __exit__(self, exc_type, exc_value, exc_traceback):
|
|
121
|
+
end_timestamp = perf_counter()
|
|
122
|
+
duration = end_timestamp - self.start_timestamp
|
|
123
|
+
if os.getenv("MS_DEV_PARAM_INIT_PROF_COLLECT"):
|
|
124
|
+
logger.warning(f"{self.func_name}: {self.parameter.name}, shape: {self.parameter.shape}, "
|
|
125
|
+
f"sliced: {self.parameter.sliced}, duration: {duration}")
|
|
126
|
+
|
|
127
|
+
|
|
107
128
|
def _slice_parameter(parameter, phase, layout):
|
|
108
129
|
"""Slice python parameter obj according to the layout."""
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
130
|
+
# graph_executor.updata_param_node_default_input(phase, {parameter.name: parameter})
|
|
131
|
+
if getattr(parameter, "init_param", False):
|
|
132
|
+
if layout is None:
|
|
133
|
+
parameter.sliced = True
|
|
134
|
+
return
|
|
135
|
+
if not parameter.sliced:
|
|
136
|
+
rank = get_rank()
|
|
137
|
+
new_tensor_shape = _load_tensor_shape_by_layout(parameter, layout, rank)
|
|
138
|
+
parameter.shape = new_tensor_shape
|
|
139
|
+
else:
|
|
140
|
+
graph_executor = GraphExecutor_.get_instance()
|
|
141
|
+
new_param = parameter.init_data(layout, set_sliced=True)
|
|
142
|
+
parameter = new_param
|
|
143
|
+
graph_executor.updata_param_node_default_input(phase, {parameter.name: parameter})
|
|
144
|
+
if layout is None:
|
|
145
|
+
parameter.sliced = True
|
|
146
|
+
return
|
|
147
|
+
if not parameter.sliced:
|
|
114
148
|
rank = get_rank()
|
|
115
149
|
new_tensor = _load_tensor_by_layout(parameter, layout, rank)
|
|
116
150
|
parameter.set_data(new_tensor, True)
|
|
117
|
-
return
|
|
118
|
-
layout_shape = layout[2]
|
|
119
|
-
parameter.shape = tuple(layout_shape)
|
|
120
|
-
return
|
|
121
|
-
graph_executor = GraphExecutor_.get_instance()
|
|
122
|
-
new_param = parameter.init_data(layout, set_sliced=True)
|
|
123
|
-
parameter = new_param
|
|
124
|
-
graph_executor.updata_param_node_default_input(phase, {parameter.name: parameter})
|
|
125
|
-
if layout is None:
|
|
126
|
-
parameter.sliced = True
|
|
127
|
-
return
|
|
128
|
-
if not parameter.sliced:
|
|
129
|
-
rank = get_rank()
|
|
130
|
-
new_tensor = _load_tensor_by_layout(parameter, layout, rank)
|
|
131
|
-
parameter.set_data(new_tensor, True)
|
|
132
151
|
|
|
133
152
|
|
|
134
153
|
def _slice_tensor(tensor, layout, rank_id):
|
|
@@ -32,7 +32,7 @@ from mindspore.parallel._parallel_serialization import _get_device_num_from_stra
|
|
|
32
32
|
from mindspore.parallel._tensor import _get_tensor_strategy, _construct_from_to_tensor_layout, \
|
|
33
33
|
_get_needed_rank_transform_operator_map_by_layouts, \
|
|
34
34
|
_generate_transform_operator_stack, _apply_tensor_transform_operators, _construct_tensor_layout_for_opt_shard, \
|
|
35
|
-
_extract_layout_item, _load_tensor_shape
|
|
35
|
+
_extract_layout_item, _load_tensor_shape, _apply_operator
|
|
36
36
|
from mindspore.parallel._parallel_serialization import _build_searched_strategy, _load_protobuf_strategy, \
|
|
37
37
|
_convert_to_list
|
|
38
38
|
|
|
@@ -375,12 +375,10 @@ def _transform_stage_safetensors(src_strategy_dict, dst_strategy_dict, ckpt_pref
|
|
|
375
375
|
if int(needed_rank) not in all_safetensor_files_map:
|
|
376
376
|
raise ValueError("The safetensor file of rank{} is needed for converting rank{}'s safetensor, "
|
|
377
377
|
"but it is missing.".format(needed_rank, rank))
|
|
378
|
-
|
|
378
|
+
dst_stage_num = _extract_pipeline_stage_num(dst_strategy_dict)
|
|
379
|
+
if not (len(needed_rank_list_map) == 1 and dst_stage_num > 1) and process_num > len(needed_rank_list_map):
|
|
379
380
|
ms.log.warning("The value of process_num cannot be greater than that of needed_rank_list_map.")
|
|
380
381
|
process_num = len(needed_rank_list_map)
|
|
381
|
-
dst_stage_num = _extract_pipeline_stage_num(dst_strategy_dict)
|
|
382
|
-
if len(needed_rank_list_map) == 1 and dst_stage_num > 1:
|
|
383
|
-
process_num = dst_stage_num
|
|
384
382
|
_transform_safetensors_with_parallel(needed_rank_list_map, all_safetensor_files_map, src_stage_device_num,
|
|
385
383
|
dst_stage_device_num, src_strategy_dict, dst_strategy_dict,
|
|
386
384
|
origin_src_strategy_list, origin_dst_strategy_list, ckpt_prefix,
|
|
@@ -452,18 +450,18 @@ def _transform_safetensors_with_parallel(needed_rank_list_map, all_safetensor_fi
|
|
|
452
450
|
"""
|
|
453
451
|
Transforms safetensors files to a specified format using parallel processing.
|
|
454
452
|
"""
|
|
455
|
-
part_list_dict = _distribute_files_by_size(all_safetensor_files_map, needed_rank_list_map, process_num)
|
|
456
|
-
|
|
457
453
|
# cal param name for every pipeline, save in pipe_param_list.
|
|
458
454
|
pipe_num = _extract_pipeline_stage_num(dst_strategy_dict)
|
|
459
455
|
pipe_param_list = [None for _ in range(max(pipe_num, process_num))]
|
|
460
456
|
if len(needed_rank_list_map) == 1 and pipe_num > 1:
|
|
457
|
+
process_num = pipe_num
|
|
461
458
|
pipe_param_list = [[] for _ in range(pipe_num)]
|
|
462
459
|
layout_map = _convert_to_list(dst_strategy_dict)
|
|
463
460
|
|
|
464
461
|
for name, layout in layout_map.items():
|
|
465
462
|
pipe_param_list[layout[6][0]].append(name)
|
|
466
463
|
|
|
464
|
+
part_list_dict = _distribute_files_by_size(all_safetensor_files_map, needed_rank_list_map, process_num)
|
|
467
465
|
processes = []
|
|
468
466
|
for i in range(process_num):
|
|
469
467
|
p = mp.Process(target=_transform_safetensors_single, args=(
|
|
@@ -476,15 +474,74 @@ def _transform_safetensors_with_parallel(needed_rank_list_map, all_safetensor_fi
|
|
|
476
474
|
p.join()
|
|
477
475
|
|
|
478
476
|
|
|
477
|
+
def _count_redundancy_list(rank_num, param_name, redundancy_dict, device_num):
|
|
478
|
+
"""Obtain the specified redundant group."""
|
|
479
|
+
redundancy_tuple = redundancy_dict.get(param_name)
|
|
480
|
+
for rank_list in redundancy_tuple:
|
|
481
|
+
for rank in rank_list:
|
|
482
|
+
if rank_num % device_num == rank % device_num:
|
|
483
|
+
return set(rank_list)
|
|
484
|
+
return set()
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _find_remove_redundancy_rank_id(pipe_param_list, single_param_dict, file_dict, saftensor_dict, redundancy_dict,
|
|
488
|
+
needed_rank, device_num):
|
|
489
|
+
"""Find the rank_id under redundant groups."""
|
|
490
|
+
for param_name in pipe_param_list:
|
|
491
|
+
rank_num = int(needed_rank)
|
|
492
|
+
redundancy_ranks = _count_redundancy_list(rank_num, param_name, redundancy_dict, device_num)
|
|
493
|
+
open_file_id = None
|
|
494
|
+
if single_param_dict.get(param_name) is None:
|
|
495
|
+
continue
|
|
496
|
+
for real_rank in single_param_dict[param_name]:
|
|
497
|
+
for redundancy_rank in redundancy_ranks:
|
|
498
|
+
if real_rank % device_num == redundancy_rank % device_num:
|
|
499
|
+
open_file_id = real_rank
|
|
500
|
+
break
|
|
501
|
+
if open_file_id is not None:
|
|
502
|
+
output = file_dict[open_file_id].get_tensor(param_name)
|
|
503
|
+
saftensor_dict[param_name] = output
|
|
504
|
+
else:
|
|
505
|
+
raise ValueError(f"For _transform_safetensors_single, {param_name} should be in "
|
|
506
|
+
f"{redundancy_ranks}, but in {single_param_dict[param_name]}.")
|
|
507
|
+
|
|
508
|
+
|
|
479
509
|
def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map, src_stage_device_num,
|
|
480
510
|
dst_stage_device_num,
|
|
481
511
|
src_strategy_dict, dst_strategy_dict, origin_src_strategy_list,
|
|
482
512
|
origin_dst_strategy_list,
|
|
483
513
|
ckpt_prefix, dst_safetensors_dir, output_format,
|
|
484
|
-
_transform_param_list, pipe_param_list=None, file_index=None, unified_flag=False
|
|
514
|
+
_transform_param_list, pipe_param_list=None, file_index=None, unified_flag=False,
|
|
515
|
+
src_strategy_file=None):
|
|
485
516
|
"""
|
|
486
517
|
Transforms safetensors files to a specified format without using parallel processing.
|
|
487
518
|
"""
|
|
519
|
+
if src_strategy_file is not None:
|
|
520
|
+
from mindspore.train._utils import get_parameter_redundancy
|
|
521
|
+
redundancy_dict_tmp = get_parameter_redundancy(src_strategy_file)
|
|
522
|
+
redundancy_dict = {}
|
|
523
|
+
device_num = 0
|
|
524
|
+
for param_name, redundancy in redundancy_dict_tmp.items():
|
|
525
|
+
if device_num == 0:
|
|
526
|
+
device_num = max(max(redundancy)) + 1
|
|
527
|
+
origin_param_name = param_name
|
|
528
|
+
pipeline_stage = 0
|
|
529
|
+
if "-" in param_name:
|
|
530
|
+
pipeline_stage, origin_param_name = param_name.split("-")
|
|
531
|
+
pipeline_stage = int(pipeline_stage)
|
|
532
|
+
redundancy_new = tuple(
|
|
533
|
+
(tuple(x + pipeline_stage * device_num for x in subtuple)) for subtuple in redundancy)
|
|
534
|
+
redundancy_dict[origin_param_name] = redundancy_new
|
|
535
|
+
file_dict = {}
|
|
536
|
+
single_param_dict = {}
|
|
537
|
+
for file_id, _ in all_safetensor_files_map.items():
|
|
538
|
+
f = safe_open(all_safetensor_files_map.get(file_id), framework="np")
|
|
539
|
+
file_dict[file_id] = f
|
|
540
|
+
for param_name in f.keys():
|
|
541
|
+
if param_name not in single_param_dict.keys():
|
|
542
|
+
single_param_dict[param_name] = {file_id}
|
|
543
|
+
else:
|
|
544
|
+
single_param_dict[param_name].add(file_id)
|
|
488
545
|
src_strategy_list_keys = _convert_to_list(src_strategy_dict).keys() if src_strategy_dict else []
|
|
489
546
|
dst_strategy_list_keys = _convert_to_list(dst_strategy_dict).keys() if dst_strategy_dict else []
|
|
490
547
|
for needed_rank_list_key, transform_rank_list in needed_rank_list_map.items():
|
|
@@ -494,19 +551,23 @@ def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map
|
|
|
494
551
|
for needed_rank in needed_rank_list:
|
|
495
552
|
if pipe_param_list:
|
|
496
553
|
saftensor_dict = dict()
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
554
|
+
if src_strategy_file is not None:
|
|
555
|
+
_find_remove_redundancy_rank_id(pipe_param_list, single_param_dict, file_dict, saftensor_dict,
|
|
556
|
+
redundancy_dict, needed_rank, device_num)
|
|
557
|
+
else:
|
|
558
|
+
with safe_open(all_safetensor_files_map.get(int(needed_rank)), framework="np") as f:
|
|
559
|
+
if not unified_flag:
|
|
560
|
+
all_param_name_set = set(f.keys())
|
|
561
|
+
src_param_name_set = set(src_strategy_list_keys)
|
|
562
|
+
dst_param_name_set = set(dst_strategy_list_keys)
|
|
563
|
+
hyper_param_set = all_param_name_set - (src_param_name_set & dst_param_name_set)
|
|
564
|
+
pipe_param_list.extend(list(hyper_param_set))
|
|
565
|
+
for param_name in pipe_param_list:
|
|
566
|
+
if param_name not in f.keys():
|
|
567
|
+
# param not in ckpt file, check reason
|
|
568
|
+
continue
|
|
569
|
+
output = f.get_tensor(param_name)
|
|
570
|
+
saftensor_dict[param_name] = output
|
|
510
571
|
else:
|
|
511
572
|
saftensor_dict = load_file(all_safetensor_files_map.get(int(needed_rank)))
|
|
512
573
|
for param_name, param in saftensor_dict.items():
|
|
@@ -527,7 +588,7 @@ def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map
|
|
|
527
588
|
local_rank_id = transform_rank % dst_stage_device_num
|
|
528
589
|
transform_param_dict = _transform_parallel_safetensor(local_rank_id, param_total_dict,
|
|
529
590
|
param_attr_dict, src_strategy_list, dst_strategy_list,
|
|
530
|
-
param_total_dict_keys)
|
|
591
|
+
param_total_dict_keys, src_strategy_file)
|
|
531
592
|
if file_index is not None:
|
|
532
593
|
save_safetensor_file = f"part{file_index}.{output_format}"
|
|
533
594
|
save_safetensor_file_dir = dst_safetensors_dir
|
|
@@ -674,7 +735,7 @@ def transform_safetensors_by_rank(rank_id, safetensor_files_map, save_safetensor
|
|
|
674
735
|
save_file(transform_param_dict, save_safetensor_file_name)
|
|
675
736
|
|
|
676
737
|
|
|
677
|
-
def _collect_safetensor_files(src_safetensors_dir, format='safetensors'):
|
|
738
|
+
def _collect_safetensor_files(src_safetensors_dir, format='safetensors', file_suffix=None):
|
|
678
739
|
"""
|
|
679
740
|
Collects all safetensors files from the specified directory and its subdirectories.
|
|
680
741
|
"""
|
|
@@ -692,7 +753,10 @@ def _collect_safetensor_files(src_safetensors_dir, format='safetensors'):
|
|
|
692
753
|
format(safetensor_dir))
|
|
693
754
|
continue
|
|
694
755
|
rank_id = int(rank_id_str)
|
|
695
|
-
|
|
756
|
+
if file_suffix is None:
|
|
757
|
+
safetensor_file_name = os.path.join(safetensor_dir, f"*.{format}")
|
|
758
|
+
else:
|
|
759
|
+
safetensor_file_name = os.path.join(safetensor_dir, f"*{file_suffix}.{format}")
|
|
696
760
|
rank_ckpts = glob.glob(safetensor_file_name)
|
|
697
761
|
rank_ckpts.sort()
|
|
698
762
|
for safetensor_file in rank_ckpts:
|
|
@@ -727,7 +791,7 @@ def load_file_by_param_name(filename, parme_name_list):
|
|
|
727
791
|
|
|
728
792
|
|
|
729
793
|
def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, src_strategy_list,
|
|
730
|
-
dst_strategy_list, param_total_dict_keys=None):
|
|
794
|
+
dst_strategy_list, param_total_dict_keys=None, src_strategy_file=None):
|
|
731
795
|
"""
|
|
732
796
|
Transform model parallel dimension for distributed safetensor files.
|
|
733
797
|
"""
|
|
@@ -779,7 +843,7 @@ def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, s
|
|
|
779
843
|
|
|
780
844
|
# when the from_layout is less devices, the safetensor_map for map[device_num] should using map[0]
|
|
781
845
|
device_list = list(range(0, np.prod(from_tensor_layout[0])))
|
|
782
|
-
if rank_id % device_num not in param_attr_dict[param_name]:
|
|
846
|
+
if rank_id % device_num not in param_attr_dict[param_name] and src_strategy_file is None:
|
|
783
847
|
raise ValueError("The safetensor of rank {} is missing.".format(rank_id % device_num))
|
|
784
848
|
param_rank_map = _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout,
|
|
785
849
|
device_list, rank_id)
|
|
@@ -801,7 +865,7 @@ def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, s
|
|
|
801
865
|
return transform_param_dict
|
|
802
866
|
|
|
803
867
|
|
|
804
|
-
def unified_safetensors(src_dir, src_strategy_file, dst_dir):
|
|
868
|
+
def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundancy=True, file_suffix=None):
|
|
805
869
|
"""
|
|
806
870
|
Merge multiple safetensor files into a unified safetensor file.
|
|
807
871
|
|
|
@@ -809,6 +873,10 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir):
|
|
|
809
873
|
src_dir (str): Source weight saving directory.
|
|
810
874
|
src_strategy_file (str): Source weight segmentation strategy file.
|
|
811
875
|
dst_dir (str): Target save directory.
|
|
876
|
+
merge_with_redundancy (bool, optional): Whether the merged source weight files are de-duplicated and
|
|
877
|
+
saved safetensors files. Default: ``True``, indicating that the merged source weight files are complete.
|
|
878
|
+
file_suffix (str, optional): Specify the filename suffix for merging safetensors files. Default: ``None``,
|
|
879
|
+
meaning all safetensors files in the source weight directory will be merged.
|
|
812
880
|
|
|
813
881
|
Raises:
|
|
814
882
|
ValueError: If the safetensors file of rank is missing.
|
|
@@ -827,8 +895,8 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir):
|
|
|
827
895
|
_make_dir(dst_dir, "path")
|
|
828
896
|
if os.path.isfile(src_dir):
|
|
829
897
|
raise ValueError("For 'unified_safetensors', the 'src_dir' can not be a file.")
|
|
830
|
-
all_safetensor_files_map = _collect_safetensor_files(src_dir)
|
|
831
|
-
all_ckpt_files_map = _collect_safetensor_files(src_dir, format=
|
|
898
|
+
all_safetensor_files_map = _collect_safetensor_files(src_dir, format="safetensors", file_suffix=file_suffix)
|
|
899
|
+
all_ckpt_files_map = _collect_safetensor_files(src_dir, format="ckpt", file_suffix=file_suffix)
|
|
832
900
|
if all_safetensor_files_map and all_ckpt_files_map:
|
|
833
901
|
raise ValueError("For 'unified_safetensors', the 'src_dir' cannot contain "
|
|
834
902
|
"both ckpt file and safetensors file simultaneously")
|
|
@@ -847,14 +915,21 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir):
|
|
|
847
915
|
layout_map = _convert_to_list(src_strategy_dict)
|
|
848
916
|
|
|
849
917
|
total_size = 0
|
|
918
|
+
actual_params = set()
|
|
850
919
|
for _, file_name in all_safetensor_files_map.items():
|
|
851
920
|
total_size += os.path.getsize(file_name) / 1024 / 1024 / 1024
|
|
921
|
+
with safe_open(file_name, framework="np") as f:
|
|
922
|
+
actual_params.update(f.keys())
|
|
852
923
|
split_num = math.ceil(total_size / 3)
|
|
924
|
+
params_to_store = actual_params & set(layout_map.keys())
|
|
853
925
|
|
|
854
|
-
name_list =
|
|
926
|
+
name_list = []
|
|
927
|
+
for name in list(params_to_store):
|
|
928
|
+
if name.startswith("accu_grads"):
|
|
929
|
+
continue
|
|
930
|
+
name_list.append(name)
|
|
855
931
|
split_list = _split_list(name_list, split_num)
|
|
856
932
|
|
|
857
|
-
all_safetensor_files_map = _collect_safetensor_files(src_dir)
|
|
858
933
|
with safe_open(all_safetensor_files_map.get(0), framework="np") as f:
|
|
859
934
|
all_key = f.keys()
|
|
860
935
|
hyper_parameter = set(all_key) - set(name_list)
|
|
@@ -878,12 +953,14 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir):
|
|
|
878
953
|
res = [i for i in range(split_num)]
|
|
879
954
|
res = _split_list(res, max_process)
|
|
880
955
|
processes = []
|
|
881
|
-
|
|
956
|
+
src_strategy_name = None
|
|
957
|
+
if not merge_with_redundancy:
|
|
958
|
+
src_strategy_name = src_strategy_file
|
|
882
959
|
for i in range(max_process):
|
|
883
960
|
p = mp.Process(target=_transform_safetensors_single_semaphore, args=(
|
|
884
961
|
needed_rank_list_map, all_safetensor_files_map, src_stage_device_num, dst_stage_device_num,
|
|
885
962
|
src_strategy_dict, None, origin_src_strategy_list, origin_dst_strategy_list,
|
|
886
|
-
"", dst_dir, "safetensors", None, split_list, res[i], True))
|
|
963
|
+
"", dst_dir, "safetensors", None, split_list, res[i], True, src_strategy_name))
|
|
887
964
|
p.start()
|
|
888
965
|
processes.append(p)
|
|
889
966
|
for p in processes:
|
|
@@ -897,13 +974,13 @@ def _transform_safetensors_single_semaphore(needed_rank_list_map, all_safetensor
|
|
|
897
974
|
origin_dst_strategy_list,
|
|
898
975
|
ckpt_prefix, dst_safetensors_dir, output_format,
|
|
899
976
|
_transform_param_list, pipe_param_list=None, file_index=None,
|
|
900
|
-
unified_flag=False):
|
|
977
|
+
unified_flag=False, src_strategy_file=None):
|
|
901
978
|
for i in file_index:
|
|
902
979
|
_transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map, src_stage_device_num,
|
|
903
980
|
dst_stage_device_num, src_strategy_dict, dst_strategy_dict,
|
|
904
981
|
origin_src_strategy_list,
|
|
905
982
|
origin_dst_strategy_list, ckpt_prefix, dst_safetensors_dir, output_format,
|
|
906
|
-
_transform_param_list, pipe_param_list[i], i, unified_flag)
|
|
983
|
+
_transform_param_list, pipe_param_list[i], i, unified_flag, src_strategy_file)
|
|
907
984
|
|
|
908
985
|
|
|
909
986
|
def _split_list(split_list, split_num):
|
|
@@ -911,6 +988,45 @@ def _split_list(split_list, split_num):
|
|
|
911
988
|
return [array.tolist() for array in split_array]
|
|
912
989
|
|
|
913
990
|
|
|
991
|
+
def _apply_sf_obj_transform_operators(transform_operator_stack, sf_obj, device_num):
|
|
992
|
+
"""apply safetensors object operators"""
|
|
993
|
+
if not transform_operator_stack:
|
|
994
|
+
return sf_obj[:]
|
|
995
|
+
level = transform_operator_stack[-1][1]
|
|
996
|
+
level_operators = []
|
|
997
|
+
while True:
|
|
998
|
+
if not transform_operator_stack or (level != transform_operator_stack[-1][1]):
|
|
999
|
+
tmp_tensor_dict = {}
|
|
1000
|
+
if not level_operators:
|
|
1001
|
+
continue
|
|
1002
|
+
op_name = level_operators[0][2][0]
|
|
1003
|
+
for operator_pair in level_operators:
|
|
1004
|
+
rank_id = operator_pair[0]
|
|
1005
|
+
cur_level = operator_pair[1]
|
|
1006
|
+
operator = operator_pair[2]
|
|
1007
|
+
if operator[0] != op_name:
|
|
1008
|
+
raise ValueError("The operator in the same level should be equal in the transform tensor operator "
|
|
1009
|
+
"list, but the find {} and {} in level {}".format(op_name, operator[0], cur_level))
|
|
1010
|
+
if operator[0] != "AllConcat":
|
|
1011
|
+
sf_obj = _apply_operator(operator[0])(sf_obj, operator)
|
|
1012
|
+
continue
|
|
1013
|
+
for rank in operator[1][:-1]:
|
|
1014
|
+
if rank % device_num not in sf_obj:
|
|
1015
|
+
raise ValueError("The checkpoint file of rank {} is missing.".format(rank % device_num))
|
|
1016
|
+
allgather_list = [sf_obj for _ in operator[1][:-1]]
|
|
1017
|
+
tmp_tensor_dict[rank_id % device_num] = _apply_operator(operator[0])(allgather_list, operator)
|
|
1018
|
+
if op_name == "AllConcat":
|
|
1019
|
+
for rank, value in tmp_tensor_dict.items():
|
|
1020
|
+
sf_obj = value
|
|
1021
|
+
level_operators.clear()
|
|
1022
|
+
if not transform_operator_stack:
|
|
1023
|
+
break
|
|
1024
|
+
operator_pair = transform_operator_stack.pop()
|
|
1025
|
+
level = operator_pair[1]
|
|
1026
|
+
level_operators.append(operator_pair)
|
|
1027
|
+
return sf_obj
|
|
1028
|
+
|
|
1029
|
+
|
|
914
1030
|
def _load_parallel_checkpoint(total_safetensors_dir, dst_strategy_file, net=None, dst_safetensors_dir=None,
|
|
915
1031
|
rank_id=None):
|
|
916
1032
|
"""load parallel safetensors by merged file."""
|
|
@@ -930,7 +1046,9 @@ def _load_parallel_checkpoint(total_safetensors_dir, dst_strategy_file, net=None
|
|
|
930
1046
|
param_list = param_name_map.keys()
|
|
931
1047
|
|
|
932
1048
|
total_param = dict()
|
|
933
|
-
|
|
1049
|
+
dst_stage_device_num = np.prod(dst_strategy_list.get(list(dst_strategy_list.keys())[0])[0]) if dst_strategy_list \
|
|
1050
|
+
is not None else 1
|
|
1051
|
+
local_rank_id = rank_id % dst_stage_device_num
|
|
934
1052
|
for param_name in param_list:
|
|
935
1053
|
if param_name not in param_name_map:
|
|
936
1054
|
continue
|
|
@@ -939,19 +1057,54 @@ def _load_parallel_checkpoint(total_safetensors_dir, dst_strategy_file, net=None
|
|
|
939
1057
|
if param_name not in f.keys():
|
|
940
1058
|
continue
|
|
941
1059
|
sf_obj = f.get_slice(param_name)
|
|
942
|
-
param_dict = dict()
|
|
943
|
-
param_dict[param_name] = sf_obj
|
|
944
1060
|
|
|
1061
|
+
tensor_shape = sf_obj.get_shape()
|
|
1062
|
+
from_dev_matrix = [1]
|
|
1063
|
+
from_tensor_map = [-1] * len(tensor_shape)
|
|
1064
|
+
from_opt_shard_step = 0
|
|
1065
|
+
from_opt_shard_size = 0
|
|
945
1066
|
if dst_strategy_list is not None:
|
|
946
1067
|
if param_name not in dst_strategy_list:
|
|
947
1068
|
continue
|
|
948
|
-
|
|
1069
|
+
to_dev_matrix_origin, to_tensor_map_origin, to_opt_shard_step, to_opt_shard_size = _extract_layout_item(
|
|
1070
|
+
dst_strategy_list.get(param_name))
|
|
1071
|
+
|
|
1072
|
+
device_num = np.prod(from_dev_matrix)
|
|
1073
|
+
param_strategy = _get_tensor_strategy(from_dev_matrix, from_tensor_map)
|
|
1074
|
+
origin_tensor_shape = ()
|
|
1075
|
+
for i, item in enumerate(tensor_shape):
|
|
1076
|
+
if i == 0 and from_opt_shard_size > 0:
|
|
1077
|
+
origin_tensor_shape += (item * param_strategy[i] * from_opt_shard_size,)
|
|
1078
|
+
continue
|
|
1079
|
+
origin_tensor_shape += (item * param_strategy[i],)
|
|
1080
|
+
|
|
1081
|
+
from_dev_matrix, from_tensor_map, from_full_tensor_shape = _construct_tensor_layout_for_opt_shard(
|
|
1082
|
+
from_dev_matrix, from_tensor_map, from_opt_shard_step, from_opt_shard_size, origin_tensor_shape)
|
|
1083
|
+
to_dev_matrix, to_tensor_map, to_full_tensor_shape = _construct_tensor_layout_for_opt_shard(
|
|
1084
|
+
to_dev_matrix_origin, to_tensor_map_origin, to_opt_shard_step, to_opt_shard_size, origin_tensor_shape)
|
|
1085
|
+
# Convert tensor layout to same device num
|
|
1086
|
+
from_tensor_layout, to_tensor_layout = _construct_from_to_tensor_layout(from_full_tensor_shape,
|
|
1087
|
+
from_dev_matrix,
|
|
1088
|
+
from_tensor_map,
|
|
1089
|
+
to_full_tensor_shape,
|
|
1090
|
+
to_dev_matrix, to_tensor_map)
|
|
1091
|
+
|
|
1092
|
+
# when the from_layout is less devices, the safetensor_map for map[device_num] should using map[0]
|
|
1093
|
+
device_list = list(range(0, np.prod(from_tensor_layout[0])))
|
|
1094
|
+
param_rank_map = _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout,
|
|
1095
|
+
device_list, local_rank_id)
|
|
1096
|
+
|
|
1097
|
+
from_info_tuple = (from_opt_shard_size, from_dev_matrix, from_tensor_map, from_full_tensor_shape)
|
|
1098
|
+
to_info_tuple = (to_opt_shard_size, to_dev_matrix_origin, to_tensor_map_origin, origin_tensor_shape)
|
|
1099
|
+
_insert_opt_shard_reshape(param_rank_map, from_info_tuple, to_info_tuple)
|
|
1100
|
+
transform_operator_stack = _generate_transform_operator_stack(param_rank_map, local_rank_id)
|
|
1101
|
+
|
|
1102
|
+
slice_param = _apply_sf_obj_transform_operators(transform_operator_stack, sf_obj, device_num)
|
|
949
1103
|
else:
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
if shape is not None:
|
|
953
|
-
slice_param = slice_param.reshape(shape)
|
|
1104
|
+
slice_param = sf_obj[:]
|
|
1105
|
+
|
|
954
1106
|
total_param[param_name] = ms.Parameter(slice_param)
|
|
1107
|
+
|
|
955
1108
|
if 'hyper_param.safetensors' in file_list:
|
|
956
1109
|
hyper_parameter_file_name = os.path.join(total_safetensors_dir, "hyper_param.safetensors")
|
|
957
1110
|
with safe_open(hyper_parameter_file_name, framework="np") as f:
|
mindspore/profiler/profiling.py
CHANGED
|
@@ -31,7 +31,7 @@ from mindspore.context import get_auto_parallel_context
|
|
|
31
31
|
from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
|
|
32
32
|
import mindspore._c_expression as c_expression
|
|
33
33
|
import mindspore._c_dataengine as cde
|
|
34
|
-
from mindspore._c_expression import _framework_profiler_enable_mi
|
|
34
|
+
from mindspore._c_expression import _framework_profiler_enable_mi, _framework_profiler_disable_mi
|
|
35
35
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
|
|
36
36
|
ProfilerIOException, ProfilerException, ProfilerRawFileException, ProfilerParamTypeErrorException
|
|
37
37
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
|
|
@@ -824,6 +824,10 @@ class Profiler:
|
|
|
824
824
|
self._ascend_profiler.stop()
|
|
825
825
|
|
|
826
826
|
self._stop_time = int(time.time() * 10000000)
|
|
827
|
+
|
|
828
|
+
if self._profile_framework:
|
|
829
|
+
_framework_profiler_disable_mi()
|
|
830
|
+
|
|
827
831
|
ProfilerInfo.set_profiling_stop_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
828
832
|
self._init_profiler_info()
|
|
829
833
|
ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
|
|
@@ -266,8 +266,10 @@ class AscendEnvChecker(EnvChecker):
|
|
|
266
266
|
self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
|
|
267
267
|
self.ascend_opp_path = os.getenv("ASCEND_OPP_PATH")
|
|
268
268
|
self.ascend_aicpu_path = os.getenv("ASCEND_AICPU_PATH")
|
|
269
|
-
|
|
270
|
-
|
|
269
|
+
if not self.ascend_opp_path is None:
|
|
270
|
+
self.compiler_version = self.ascend_opp_path.split("opp")[0] + "compiler/version.info"
|
|
271
|
+
else:
|
|
272
|
+
self.compiler_version = ""
|
|
271
273
|
# check content
|
|
272
274
|
self.path_check = "/compiler/ccec_compiler/bin"
|
|
273
275
|
self.python_path_check = "opp/built-in/op_impl/ai_core/tbe"
|