mindspore 2.2.0__cp37-cp37m-manylinux1_x86_64.whl → 2.2.11__cp37-cp37m-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mindspore/.commit_id +1 -1
- mindspore/_akg/akg/composite/build_module.py +104 -20
- mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
- mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
- mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
- mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
- mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
- mindspore/_akg/akg/utils/kernel_exec.py +41 -15
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
- mindspore/_akg/akg/utils/util.py +56 -1
- mindspore/_c_dataengine.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +3 -3
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/splitter.py +3 -2
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
- mindspore/_extends/parse/__init__.py +3 -2
- mindspore/_extends/parse/parser.py +6 -1
- mindspore/_extends/parse/standard_method.py +14 -11
- mindspore/_extends/remote/kernel_build_server.py +2 -1
- mindspore/_mindspore_offline_debug.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/common/_utils.py +16 -0
- mindspore/common/api.py +1 -1
- mindspore/common/auto_dynamic_shape.py +81 -85
- mindspore/common/dump.py +1 -1
- mindspore/common/tensor.py +3 -20
- mindspore/config/op_info.config +1 -1
- mindspore/context.py +11 -4
- mindspore/dataset/engine/cache_client.py +8 -5
- mindspore/dataset/engine/datasets_standard_format.py +5 -0
- mindspore/dataset/vision/transforms.py +21 -21
- mindspore/experimental/optim/adam.py +1 -1
- mindspore/gen_ops.py +1 -1
- mindspore/include/api/model.h +17 -0
- mindspore/include/api/status.h +8 -3
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +78 -80
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
- mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
- mindspore/nn/cell.py +0 -3
- mindspore/nn/layer/activation.py +4 -5
- mindspore/nn/layer/conv.py +39 -23
- mindspore/nn/layer/flash_attention.py +54 -129
- mindspore/nn/layer/math.py +3 -7
- mindspore/nn/layer/rnn_cells.py +5 -5
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +12 -3
- mindspore/numpy/utils_const.py +5 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
- mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/_utils/utils.py +2 -0
- mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
- mindspore/ops/function/array_func.py +10 -7
- mindspore/ops/function/grad/grad_func.py +0 -1
- mindspore/ops/function/nn_func.py +98 -9
- mindspore/ops/function/random_func.py +2 -1
- mindspore/ops/op_info_register.py +24 -21
- mindspore/ops/operations/__init__.py +6 -2
- mindspore/ops/operations/_grad_ops.py +25 -6
- mindspore/ops/operations/_inner_ops.py +155 -23
- mindspore/ops/operations/array_ops.py +9 -7
- mindspore/ops/operations/comm_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +85 -68
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +7 -6
- mindspore/ops/operations/nn_ops.py +193 -49
- mindspore/parallel/_parallel_serialization.py +10 -3
- mindspore/parallel/_tensor.py +4 -1
- mindspore/parallel/checkpoint_transform.py +13 -2
- mindspore/parallel/shard.py +17 -10
- mindspore/profiler/common/util.py +1 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
- mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
- mindspore/profiler/parser/ascend_op_generator.py +1 -1
- mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
- mindspore/profiler/parser/base_timeline_generator.py +1 -1
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
- mindspore/profiler/parser/framework_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +19 -0
- mindspore/profiler/profiling.py +46 -24
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/parsers/for_parser.py +7 -7
- mindspore/rewrite/parsers/module_parser.py +4 -4
- mindspore/rewrite/symbol_tree.py +1 -4
- mindspore/run_check/_check_version.py +5 -3
- mindspore/safeguard/rewrite_obfuscation.py +52 -28
- mindspore/scipy/ops.py +55 -5
- mindspore/scipy/optimize/__init__.py +3 -2
- mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
- mindspore/train/callback/_summary_collector.py +1 -1
- mindspore/train/dataset_helper.py +1 -0
- mindspore/train/model.py +2 -2
- mindspore/train/serialization.py +97 -11
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +23 -7
- mindspore/version.py +1 -1
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +160 -151
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
- mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- /mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
|
@@ -82,6 +82,238 @@ def count_average(data):
|
|
|
82
82
|
class AscendHCCLGenerator:
|
|
83
83
|
"""Generate ascend hccl data from files."""
|
|
84
84
|
|
|
85
|
+
def __init__(self, source_path, steptrace):
|
|
86
|
+
self.root_path = source_path
|
|
87
|
+
self.steptrace = steptrace
|
|
88
|
+
self.hccl_raw = []
|
|
89
|
+
self.hccl_data_df = np.dtype(
|
|
90
|
+
[('model_id', int), ('iteration_id', int), ('name', object), ('pid', int), ('tid', int), ('ts', float),
|
|
91
|
+
('te', float), ('dur', float), ('ph', object),
|
|
92
|
+
('task_type', object), ('link_info', object), ('transport_type', object), ('size', int), ('tag', object)])
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def _cost_analyse(iteration):
|
|
96
|
+
"""analyse communication cost and wait cost"""
|
|
97
|
+
communication_cost = np.sum(iteration[iteration['name'] != 'Notify_Wait']['dur'])
|
|
98
|
+
wait_cost = np.sum(iteration[iteration['name'] == 'Notify_Wait']['dur'])
|
|
99
|
+
return communication_cost, wait_cost
|
|
100
|
+
|
|
101
|
+
@staticmethod
|
|
102
|
+
def _rdma_analyse(groupby_transport):
|
|
103
|
+
"""rdma analyse"""
|
|
104
|
+
thread_groups = np.unique(groupby_transport['tid'])
|
|
105
|
+
thread_information = []
|
|
106
|
+
for thread_index in thread_groups:
|
|
107
|
+
groupby_thread = groupby_transport[groupby_transport['tid'] == thread_index]
|
|
108
|
+
rdma_communication_time = 0
|
|
109
|
+
rdma_communication_size = 0
|
|
110
|
+
rdma_communication_wait_time = 0
|
|
111
|
+
start_index = 0
|
|
112
|
+
end_index = groupby_thread.size - 2
|
|
113
|
+
while start_index < end_index:
|
|
114
|
+
first_task_type = groupby_thread[start_index]['task_type']
|
|
115
|
+
if first_task_type == 'RDMASend':
|
|
116
|
+
second_index = start_index + 1
|
|
117
|
+
third_index = start_index + 2
|
|
118
|
+
second_task_type = groupby_thread[second_index]['task_type']
|
|
119
|
+
third_task_type = groupby_thread[third_index]['task_type']
|
|
120
|
+
if second_task_type == 'RDMASend' and third_task_type == 'Notify Wait':
|
|
121
|
+
rdma_send_cost = groupby_thread[start_index]['dur']
|
|
122
|
+
notify_record_cost = groupby_thread[second_index]['dur']
|
|
123
|
+
notify_wait_cost = groupby_thread[third_index]['dur']
|
|
124
|
+
rdma_communication_time += rdma_send_cost + notify_record_cost + notify_wait_cost
|
|
125
|
+
rdma_communication_wait_time += notify_wait_cost
|
|
126
|
+
rdma_communication_size += groupby_thread[start_index]['size'] + groupby_thread[second_index][
|
|
127
|
+
'size']
|
|
128
|
+
start_index += 2
|
|
129
|
+
start_index += 1
|
|
130
|
+
rdma_communication_wait_time = rdma_communication_wait_time / 1e3
|
|
131
|
+
rdma_communication_size = rdma_communication_size / 1e3
|
|
132
|
+
rdma_communication_time = rdma_communication_time / 1e3
|
|
133
|
+
rdma_bandwidth = rdma_communication_size / (rdma_communication_time / 1e3) \
|
|
134
|
+
if rdma_communication_size else 0
|
|
135
|
+
thread_information.append(
|
|
136
|
+
[rdma_communication_time, rdma_communication_size, rdma_bandwidth, rdma_communication_wait_time])
|
|
137
|
+
if len(thread_information) > 1:
|
|
138
|
+
thread_information = np.sum(thread_information, axis=0).tolist()
|
|
139
|
+
|
|
140
|
+
return thread_information
|
|
141
|
+
|
|
142
|
+
def parse(self):
|
|
143
|
+
"""Analyse the original hccl data generator hccl data."""
|
|
144
|
+
hccl_data = []
|
|
145
|
+
for hccl_file in find_files(self.root_path, "hccl_*.json"):
|
|
146
|
+
with open(hccl_file) as fr:
|
|
147
|
+
hccl_data.append(self._original_data_analyse(json.load(fr)))
|
|
148
|
+
hccl_data = np.concatenate(hccl_data)
|
|
149
|
+
|
|
150
|
+
for model_id in np.unique(hccl_data['model_id']):
|
|
151
|
+
hccl_data_model = hccl_data[hccl_data['model_id'] == model_id]
|
|
152
|
+
for iteration_id in np.unique(hccl_data_model['iteration_id']):
|
|
153
|
+
hccl_data_model_iteration = hccl_data_model[hccl_data_model['iteration_id'] == iteration_id]
|
|
154
|
+
|
|
155
|
+
hccl_abstract_data = hccl_data_model_iteration[hccl_data_model_iteration['task_type'] == '']
|
|
156
|
+
hccl_detail_data = hccl_data_model_iteration[hccl_data_model_iteration['task_type'] != '']
|
|
157
|
+
hccl_abstract_data = np.sort(hccl_abstract_data, order='ts')
|
|
158
|
+
hccl_detail_data = np.sort(hccl_detail_data, order='ts')
|
|
159
|
+
|
|
160
|
+
tag = np.searchsorted(hccl_abstract_data['ts'], hccl_detail_data['ts'], side='right') - 1
|
|
161
|
+
|
|
162
|
+
hccl_detail_data['tag'] = [x[-1] for x in
|
|
163
|
+
np.char.split(hccl_abstract_data[tag]['name'].astype(str), sep='/')]
|
|
164
|
+
|
|
165
|
+
self.hccl_raw.append(self._iteration_analyse(hccl_detail_data, iteration_id))
|
|
166
|
+
|
|
167
|
+
self.hccl_raw = sorted(self.hccl_raw, key=lambda x: x[0])
|
|
168
|
+
self.hccl_raw.append(copy.deepcopy(self.hccl_raw[-1]))
|
|
169
|
+
self.hccl_raw[-1][0] = '-'
|
|
170
|
+
for _, value in self.hccl_raw[-1][4].items():
|
|
171
|
+
value[0] = '-'
|
|
172
|
+
|
|
173
|
+
def write(self, hccl_raw_path):
|
|
174
|
+
"""
|
|
175
|
+
Write the flops.csv and flops_summary.json
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
hccl_raw_path(str): hccl_raw.csv path.
|
|
179
|
+
"""
|
|
180
|
+
try:
|
|
181
|
+
with os.fdopen(os.open(hccl_raw_path,
|
|
182
|
+
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR), 'w',
|
|
183
|
+
newline='') as hccl_row:
|
|
184
|
+
writer = csv.writer(hccl_row)
|
|
185
|
+
writer.writerow(
|
|
186
|
+
['step_num', 'communication_cost', 'wait_cost', 'link_info', 'communication_operator_cost'])
|
|
187
|
+
for row in self.hccl_raw:
|
|
188
|
+
row[3] = json.dumps(row[3])
|
|
189
|
+
row[4] = json.dumps(row[4])
|
|
190
|
+
writer.writerows(self.hccl_raw)
|
|
191
|
+
except (IOError, OSError) as err:
|
|
192
|
+
logging.critical('Errot occurred when write aicore detail file: %s', err)
|
|
193
|
+
raise ProfilerIOException() from err
|
|
194
|
+
if os.path.exists(hccl_raw_path):
|
|
195
|
+
os.chmod(hccl_raw_path, stat.S_IREAD | stat.S_IWRITE)
|
|
196
|
+
|
|
197
|
+
def _original_data_analyse(self, original_data):
|
|
198
|
+
"""analyse original data"""
|
|
199
|
+
|
|
200
|
+
groups_steptrace = {model_id: np.sort(self.steptrace[self.steptrace['Model ID'] == model_id],
|
|
201
|
+
order='Iteration ID')
|
|
202
|
+
for model_id in np.unique(self.steptrace['Model ID'])}
|
|
203
|
+
|
|
204
|
+
target_data = []
|
|
205
|
+
for row in original_data:
|
|
206
|
+
model_id = row.get('args', {}).get('model id')
|
|
207
|
+
if row.get('ph') == 'X' and model_id is not None:
|
|
208
|
+
name = row.get('name')
|
|
209
|
+
pid = row.get('pid')
|
|
210
|
+
tid = row.get('tid')
|
|
211
|
+
ts = row.get('ts')
|
|
212
|
+
dur = row.get('dur')
|
|
213
|
+
te = ts + dur
|
|
214
|
+
ph = row.get('ph')
|
|
215
|
+
task_type = row.get('args', {}).get('task type', '')
|
|
216
|
+
src_rank = row.get('args', {}).get('src rank', 0)
|
|
217
|
+
dst_rank = row.get('args', {}).get('dst rank', 0)
|
|
218
|
+
if src_rank == int('0xffffffff', 16):
|
|
219
|
+
src_rank = dst_rank
|
|
220
|
+
if dst_rank == int('0xffffffff', 16):
|
|
221
|
+
dst_rank = src_rank
|
|
222
|
+
transport_type = row.get('args', {}).get('transport type', '')
|
|
223
|
+
if transport_type == 'LOCAL':
|
|
224
|
+
src_rank, dst_rank = dst_rank, src_rank
|
|
225
|
+
link_info = str(src_rank) + '-' + str(dst_rank)
|
|
226
|
+
size = row.get('args', {}).get('size(Byte)', 0)
|
|
227
|
+
size = size if isinstance(size, int) else int(size, 16)
|
|
228
|
+
steptrace = groups_steptrace.get(model_id, None)
|
|
229
|
+
if steptrace is None:
|
|
230
|
+
logging.warning('Could not find model: %s in hccl json, skip.', model_id)
|
|
231
|
+
continue
|
|
232
|
+
tag = np.searchsorted(steptrace['Iteration End'], te * 1e-3, side='left')
|
|
233
|
+
iteration_id = steptrace[tag]['Iteration ID']
|
|
234
|
+
target_data.append(
|
|
235
|
+
tuple([model_id, iteration_id, name, pid, tid,
|
|
236
|
+
ts, te, dur, ph, task_type,
|
|
237
|
+
link_info, transport_type, size, -1]))
|
|
238
|
+
|
|
239
|
+
hccl_data = np.array(target_data, dtype=self.hccl_data_df)
|
|
240
|
+
|
|
241
|
+
return hccl_data
|
|
242
|
+
|
|
243
|
+
def _iteration_analyse(self, hccl_detail_data, iteration):
|
|
244
|
+
"""analyse data by iteration """
|
|
245
|
+
communication_cost, wait_cost = self._cost_analyse(hccl_detail_data)
|
|
246
|
+
link_info = self._link_info_analyse(hccl_detail_data)
|
|
247
|
+
communication_operator_cost = self._communication_operator_cost_analyse(hccl_detail_data, iteration)
|
|
248
|
+
return [iteration, communication_cost, wait_cost, link_info, communication_operator_cost]
|
|
249
|
+
|
|
250
|
+
def _link_info_analyse(self, hccl_detail_data):
|
|
251
|
+
"""analyse link info data"""
|
|
252
|
+
groupby_iteration = hccl_detail_data[hccl_detail_data['task_type'] != 'Notify Record']
|
|
253
|
+
link_info_groups = np.unique(groupby_iteration['link_info'])
|
|
254
|
+
link_info_information = dict()
|
|
255
|
+
for link_info_index in link_info_groups:
|
|
256
|
+
groupby_link_info = groupby_iteration[groupby_iteration['link_info'] == link_info_index]
|
|
257
|
+
transport_groups = np.unique(groupby_iteration['transport_type'])
|
|
258
|
+
transport_information = dict()
|
|
259
|
+
for transport_index in transport_groups:
|
|
260
|
+
groupby_transport = groupby_link_info[groupby_link_info['transport_type'] == transport_index]
|
|
261
|
+
if transport_index == 'SDMA' and groupby_transport.size > 0:
|
|
262
|
+
groupby_sdma = \
|
|
263
|
+
groupby_transport[np.isin(groupby_transport['task_type'], ['Memcpy', 'Reduce Inline'])][
|
|
264
|
+
['dur', 'size']]
|
|
265
|
+
sdma_communication_time = np.sum(groupby_sdma['dur']) * 1e-3
|
|
266
|
+
sdma_communication_size = np.sum(groupby_sdma['size']) * 1e-3
|
|
267
|
+
sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
|
|
268
|
+
if sdma_communication_time != 0 else 0
|
|
269
|
+
transport_information['SDMA'] = [sdma_communication_time, sdma_communication_size, sdma_bandwidth]
|
|
270
|
+
elif transport_index == 'RDMA' and groupby_transport.size > 0:
|
|
271
|
+
transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
|
|
272
|
+
link_info_information[link_info_index] = transport_information
|
|
273
|
+
return link_info_information
|
|
274
|
+
|
|
275
|
+
def _communication_operator_cost_analyse(self, hccl_detail_data, iteration_index):
|
|
276
|
+
"""analyse communication operator cost"""
|
|
277
|
+
groupby_iteration = hccl_detail_data[hccl_detail_data['task_type'] != 'Notify Record']
|
|
278
|
+
tag_groups = np.unique(groupby_iteration['tag'])
|
|
279
|
+
tag_information = dict()
|
|
280
|
+
for tag_index in tag_groups:
|
|
281
|
+
groupby_tag = groupby_iteration[groupby_iteration['tag'] == tag_index]
|
|
282
|
+
link_groups = np.unique(groupby_iteration['link_info'])
|
|
283
|
+
link_info_information = dict()
|
|
284
|
+
for link_info_index in link_groups:
|
|
285
|
+
groupby_link_info = groupby_tag[groupby_tag['link_info'] == link_info_index]
|
|
286
|
+
transport_groups = np.unique(groupby_link_info['transport_type'])
|
|
287
|
+
transport_information = dict()
|
|
288
|
+
for transport_index in transport_groups:
|
|
289
|
+
groupby_transport = groupby_link_info[groupby_link_info['transport_type'] == transport_index]
|
|
290
|
+
if transport_index == 'SDMA':
|
|
291
|
+
groupby_sdma = \
|
|
292
|
+
groupby_transport[np.isin(groupby_transport['task_type'], ['Memcpy', 'Reduce Inline'])][
|
|
293
|
+
['dur', 'size']]
|
|
294
|
+
sdma_communication_time = np.sum(groupby_sdma['dur']) * 1e-3
|
|
295
|
+
sdma_communication_size = np.sum(groupby_sdma['size']) * 1e-3
|
|
296
|
+
sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
|
|
297
|
+
if sdma_communication_time != 0 else 0
|
|
298
|
+
transport_information['SDMA'] = [
|
|
299
|
+
sdma_communication_time, sdma_communication_size,
|
|
300
|
+
sdma_bandwidth
|
|
301
|
+
]
|
|
302
|
+
elif transport_index == 'RDMA':
|
|
303
|
+
transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
|
|
304
|
+
link_info_information[link_info_index] = transport_information
|
|
305
|
+
communication_cost = np.sum(groupby_tag[groupby_tag['name'] != 'Notify_Wait']['dur'])
|
|
306
|
+
wait_cost = np.sum(groupby_tag[groupby_tag['name'] == 'Notify_Wait']['dur'])
|
|
307
|
+
tag_information[tag_index] = [
|
|
308
|
+
str(iteration_index), communication_cost, wait_cost,
|
|
309
|
+
link_info_information
|
|
310
|
+
]
|
|
311
|
+
return tag_information
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
class AscendHCCLGeneratorOld:
|
|
315
|
+
"""Generate ascend hccl data from files."""
|
|
316
|
+
|
|
85
317
|
def __init__(self, source_path):
|
|
86
318
|
self.root_path = source_path
|
|
87
319
|
self.hccl_raw = []
|
|
@@ -15,6 +15,8 @@
|
|
|
15
15
|
"""msprof PROF data export api file"""
|
|
16
16
|
import os
|
|
17
17
|
import shutil
|
|
18
|
+
import json
|
|
19
|
+
from json import JSONDecodeError
|
|
18
20
|
from collections import defaultdict
|
|
19
21
|
from subprocess import CalledProcessError, TimeoutExpired
|
|
20
22
|
from subprocess import Popen, PIPE
|
|
@@ -39,7 +41,7 @@ class AscendMsprofExporter:
|
|
|
39
41
|
>> ms_exporter = AscendMsprofExporter("path/to/profiler/data")
|
|
40
42
|
>> ms_exporter.export(start_time)
|
|
41
43
|
"""
|
|
42
|
-
|
|
44
|
+
DRV_VERSION = 467473
|
|
43
45
|
_hiai_msprof_tail = "Ascend/latest/tools/profiler/bin"
|
|
44
46
|
_msprof_cmd = "msprof"
|
|
45
47
|
_ascend_mark = "Ascend"
|
|
@@ -49,46 +51,79 @@ class AscendMsprofExporter:
|
|
|
49
51
|
_op_summary_mark = "op_summary"
|
|
50
52
|
_op_statistic_mark = "op_statistic"
|
|
51
53
|
|
|
52
|
-
def __init__(self, source_path, time_out=
|
|
54
|
+
def __init__(self, source_path, time_out=3600):
|
|
53
55
|
self._time_out = time_out
|
|
54
56
|
self.source_path = source_path
|
|
55
|
-
self.prof_root_dir = os.path.abspath(os.path.join(self.source_path, os.path.pardir))
|
|
57
|
+
self.prof_root_dir = os.path.abspath(os.path.join(self.source_path, os.path.pardir)) # PROF*/
|
|
56
58
|
|
|
57
59
|
self._check_msprof_env()
|
|
58
60
|
|
|
61
|
+
def get_drv_version(self):
|
|
62
|
+
"""Get the drv_version for choosing the export mode."""
|
|
63
|
+
host_dir = os.path.join(self.prof_root_dir, 'host')
|
|
64
|
+
cmd = ['python',
|
|
65
|
+
'/usr/local/Ascend/latest/tools/profiler/profiler_tool/analysis/interface/get_msprof_info.py',
|
|
66
|
+
'-dir', host_dir]
|
|
67
|
+
try:
|
|
68
|
+
outs, _ = self._run_cmd(cmd)
|
|
69
|
+
if not outs:
|
|
70
|
+
logger.warning('Check the drvVersion can`t find the result, use single export mode instead.')
|
|
71
|
+
return False
|
|
72
|
+
result = json.loads(outs)
|
|
73
|
+
logger.info('get drv_version result is : %s', result)
|
|
74
|
+
status = result.get('status', 1)
|
|
75
|
+
if status == 1:
|
|
76
|
+
return False
|
|
77
|
+
drv_version = result.get('data', {}).get('version_info', {}).get('drv_version', 0)
|
|
78
|
+
if drv_version >= self.DRV_VERSION:
|
|
79
|
+
return True
|
|
80
|
+
return False
|
|
81
|
+
except (RuntimeError, JSONDecodeError, AttributeError) as err:
|
|
82
|
+
logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
|
|
83
|
+
return False
|
|
84
|
+
|
|
59
85
|
def export(self, model_iteration_dict=None):
|
|
60
86
|
"""start_time is the time to collect PROF data"""
|
|
61
87
|
|
|
62
|
-
|
|
63
|
-
|
|
88
|
+
flag = self.get_drv_version()
|
|
89
|
+
if not flag or model_iteration_dict:
|
|
90
|
+
flag = False
|
|
91
|
+
if not model_iteration_dict:
|
|
92
|
+
model_iteration_dict = self._generate_step_trace(self.prof_root_dir, self.source_path)
|
|
64
93
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
94
|
+
if model_iteration_dict:
|
|
95
|
+
for model_id, value in model_iteration_dict.items():
|
|
96
|
+
for iteration_id in value:
|
|
97
|
+
msprof_export_cmd = self._msprof_command_generator_old(self.prof_root_dir, model_id,
|
|
98
|
+
iteration_id)
|
|
99
|
+
self._run_cmd(msprof_export_cmd)
|
|
71
100
|
|
|
72
|
-
|
|
101
|
+
self._check_export_files_old(self.source_path, model_iteration_dict)
|
|
102
|
+
else:
|
|
103
|
+
msprof_export_cmd = self._msprof_command_generator(self.prof_root_dir)
|
|
104
|
+
self._run_cmd(msprof_export_cmd)
|
|
105
|
+
self._check_export_files(self.source_path)
|
|
106
|
+
|
|
107
|
+
return flag
|
|
108
|
+
|
|
109
|
+
def _run_cmd(self, cmd):
|
|
73
110
|
"""run shell command"""
|
|
74
111
|
try:
|
|
75
112
|
proc = Popen(cmd, stdout=PIPE, stderr=PIPE, text=True)
|
|
76
113
|
except (FileNotFoundError, PermissionError, CalledProcessError) as exc:
|
|
77
|
-
raise RuntimeError(exc)
|
|
114
|
+
raise RuntimeError(exc) from exc
|
|
78
115
|
try:
|
|
79
116
|
outs, errs = proc.communicate(timeout=self._time_out)
|
|
80
|
-
except TimeoutExpired:
|
|
117
|
+
except TimeoutExpired as err:
|
|
81
118
|
proc.kill()
|
|
82
119
|
msg = "The possible cause is that too much data is collected " \
|
|
83
120
|
"and the export time is too long."
|
|
84
121
|
logger.error(msg)
|
|
85
|
-
raise TimeoutError(msg)
|
|
122
|
+
raise TimeoutError(msg) from err
|
|
86
123
|
logger.info(outs)
|
|
87
|
-
|
|
88
|
-
raise RuntimeError(errs)
|
|
89
|
-
return outs
|
|
124
|
+
return outs, errs
|
|
90
125
|
|
|
91
|
-
def
|
|
126
|
+
def _msprof_command_generator_old(self, output, model_id=None, iter_id=None):
|
|
92
127
|
"""msprof export helper"""
|
|
93
128
|
export_cmd = [self._msprof_cmd, "--export=on", "--output={}".format(output)]
|
|
94
129
|
if isinstance(model_id, int) and model_id >= 0:
|
|
@@ -97,6 +132,10 @@ class AscendMsprofExporter:
|
|
|
97
132
|
export_cmd.append("--iteration-id={}".format(iter_id))
|
|
98
133
|
return export_cmd
|
|
99
134
|
|
|
135
|
+
def _msprof_command_generator(self, output):
|
|
136
|
+
"""msprof export helper"""
|
|
137
|
+
return [self._msprof_cmd, "--export=on", "--output={}".format(output)]
|
|
138
|
+
|
|
100
139
|
def _check_msprof_env(self):
|
|
101
140
|
"""Check the existence of msprof binary tool"""
|
|
102
141
|
|
|
@@ -109,7 +148,7 @@ class AscendMsprofExporter:
|
|
|
109
148
|
return False
|
|
110
149
|
|
|
111
150
|
msprof_cmd = ["which", self._msprof_cmd]
|
|
112
|
-
outs = self._run_cmd(msprof_cmd
|
|
151
|
+
outs, _ = self._run_cmd(msprof_cmd)
|
|
113
152
|
if outs != "":
|
|
114
153
|
return
|
|
115
154
|
logger.warning("[Profiler]The msprof command was not found. Searching from environment variables...")
|
|
@@ -143,8 +182,7 @@ class AscendMsprofExporter:
|
|
|
143
182
|
summary_path = os.path.join(device_path, self._summary_dir)
|
|
144
183
|
timeline_path = os.path.join(device_path, self._timeline_dir)
|
|
145
184
|
|
|
146
|
-
|
|
147
|
-
self._run_cmd(msprof_export_cmd)
|
|
185
|
+
self._run_cmd(self._msprof_command_generator_old(prof_path))
|
|
148
186
|
|
|
149
187
|
if not os.path.isdir(summary_path):
|
|
150
188
|
msg = "Path {} is not a existing directory. Make sure there is " \
|
|
@@ -158,16 +196,15 @@ class AscendMsprofExporter:
|
|
|
158
196
|
return None
|
|
159
197
|
|
|
160
198
|
step_trace = defaultdict(list)
|
|
161
|
-
with open(step_trace_file, newline='', mode='r') as csvfile:
|
|
199
|
+
with os.fdopen(os.open(step_trace_file, os.O_RDONLY, 0o600), newline='', mode='r') as csvfile:
|
|
162
200
|
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
|
|
163
|
-
|
|
164
|
-
for index, value in enumerate(header):
|
|
201
|
+
for index, value in enumerate(next(reader)):
|
|
165
202
|
if value == 'Model ID':
|
|
166
|
-
|
|
203
|
+
model_id = index
|
|
167
204
|
if value == 'Iteration ID':
|
|
168
|
-
|
|
205
|
+
iteration_id = index
|
|
169
206
|
for row in reader:
|
|
170
|
-
step_trace[int(row[
|
|
207
|
+
step_trace[int(row[model_id])].append(int(row[iteration_id]))
|
|
171
208
|
|
|
172
209
|
if os.path.isdir(summary_path):
|
|
173
210
|
shutil.rmtree(summary_path)
|
|
@@ -176,16 +213,15 @@ class AscendMsprofExporter:
|
|
|
176
213
|
|
|
177
214
|
return step_trace
|
|
178
215
|
|
|
179
|
-
def
|
|
216
|
+
def _check_export_files_old(self, source_path, step_trace):
|
|
180
217
|
"""Check the existence of op_summary & op_statistic files."""
|
|
181
218
|
summary_path = os.path.join(source_path, self._summary_dir)
|
|
182
219
|
if not os.path.isdir(summary_path):
|
|
183
220
|
raise RuntimeError("Path {} is not a existing directory.".format(summary_path))
|
|
184
|
-
summary_file_list = os.listdir(summary_path)
|
|
185
221
|
op_summary = set()
|
|
186
222
|
op_statistic = set()
|
|
187
223
|
|
|
188
|
-
for summary_file in
|
|
224
|
+
for summary_file in os.listdir(summary_path):
|
|
189
225
|
if summary_file.startswith(self._op_summary_mark):
|
|
190
226
|
op_summary.add(summary_file)
|
|
191
227
|
elif summary_file.startswith(self._op_statistic_mark):
|
|
@@ -196,18 +232,25 @@ class AscendMsprofExporter:
|
|
|
196
232
|
if not op_statistic:
|
|
197
233
|
raise RuntimeError("The op_statistics file was not found, perhaps the original data was not collected.")
|
|
198
234
|
|
|
199
|
-
|
|
235
|
+
logger.info("Finish checking files.")
|
|
200
236
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
237
|
+
def _check_export_files(self, source_path):
|
|
238
|
+
"""Check the existence of op_summary & op_statistic files."""
|
|
239
|
+
summary_path = os.path.join(source_path, self._summary_dir)
|
|
240
|
+
if not os.path.isdir(summary_path):
|
|
241
|
+
raise RuntimeError("Path {} is not a existing directory.".format(summary_path))
|
|
242
|
+
summary_file_list = os.listdir(summary_path)
|
|
243
|
+
op_summary = set()
|
|
244
|
+
op_statistic = set()
|
|
245
|
+
|
|
246
|
+
for summary_file in summary_file_list:
|
|
247
|
+
if summary_file.startswith(self._op_summary_mark):
|
|
248
|
+
op_summary.add(summary_file)
|
|
249
|
+
elif summary_file.startswith(self._op_statistic_mark):
|
|
250
|
+
op_statistic.add(summary_file)
|
|
212
251
|
|
|
252
|
+
if not op_summary:
|
|
253
|
+
raise RuntimeError("The op_summary file was not found, perhaps the original data was not collected.")
|
|
254
|
+
if not op_statistic:
|
|
255
|
+
raise RuntimeError("The op_statistics file was not found, perhaps the original data was not collected.")
|
|
213
256
|
logger.info("Finish checking files.")
|