mindspore 2.2.0__cp37-cp37m-manylinux1_x86_64.whl → 2.2.11__cp37-cp37m-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mindspore/.commit_id +1 -1
- mindspore/_akg/akg/composite/build_module.py +104 -20
- mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
- mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
- mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
- mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
- mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
- mindspore/_akg/akg/utils/kernel_exec.py +41 -15
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
- mindspore/_akg/akg/utils/util.py +56 -1
- mindspore/_c_dataengine.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +3 -3
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/splitter.py +3 -2
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
- mindspore/_extends/parse/__init__.py +3 -2
- mindspore/_extends/parse/parser.py +6 -1
- mindspore/_extends/parse/standard_method.py +14 -11
- mindspore/_extends/remote/kernel_build_server.py +2 -1
- mindspore/_mindspore_offline_debug.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/common/_utils.py +16 -0
- mindspore/common/api.py +1 -1
- mindspore/common/auto_dynamic_shape.py +81 -85
- mindspore/common/dump.py +1 -1
- mindspore/common/tensor.py +3 -20
- mindspore/config/op_info.config +1 -1
- mindspore/context.py +11 -4
- mindspore/dataset/engine/cache_client.py +8 -5
- mindspore/dataset/engine/datasets_standard_format.py +5 -0
- mindspore/dataset/vision/transforms.py +21 -21
- mindspore/experimental/optim/adam.py +1 -1
- mindspore/gen_ops.py +1 -1
- mindspore/include/api/model.h +17 -0
- mindspore/include/api/status.h +8 -3
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +78 -80
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
- mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
- mindspore/nn/cell.py +0 -3
- mindspore/nn/layer/activation.py +4 -5
- mindspore/nn/layer/conv.py +39 -23
- mindspore/nn/layer/flash_attention.py +54 -129
- mindspore/nn/layer/math.py +3 -7
- mindspore/nn/layer/rnn_cells.py +5 -5
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +12 -3
- mindspore/numpy/utils_const.py +5 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
- mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/_utils/utils.py +2 -0
- mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
- mindspore/ops/function/array_func.py +10 -7
- mindspore/ops/function/grad/grad_func.py +0 -1
- mindspore/ops/function/nn_func.py +98 -9
- mindspore/ops/function/random_func.py +2 -1
- mindspore/ops/op_info_register.py +24 -21
- mindspore/ops/operations/__init__.py +6 -2
- mindspore/ops/operations/_grad_ops.py +25 -6
- mindspore/ops/operations/_inner_ops.py +155 -23
- mindspore/ops/operations/array_ops.py +9 -7
- mindspore/ops/operations/comm_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +85 -68
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +7 -6
- mindspore/ops/operations/nn_ops.py +193 -49
- mindspore/parallel/_parallel_serialization.py +10 -3
- mindspore/parallel/_tensor.py +4 -1
- mindspore/parallel/checkpoint_transform.py +13 -2
- mindspore/parallel/shard.py +17 -10
- mindspore/profiler/common/util.py +1 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
- mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
- mindspore/profiler/parser/ascend_op_generator.py +1 -1
- mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
- mindspore/profiler/parser/base_timeline_generator.py +1 -1
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
- mindspore/profiler/parser/framework_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +19 -0
- mindspore/profiler/profiling.py +46 -24
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/parsers/for_parser.py +7 -7
- mindspore/rewrite/parsers/module_parser.py +4 -4
- mindspore/rewrite/symbol_tree.py +1 -4
- mindspore/run_check/_check_version.py +5 -3
- mindspore/safeguard/rewrite_obfuscation.py +52 -28
- mindspore/scipy/ops.py +55 -5
- mindspore/scipy/optimize/__init__.py +3 -2
- mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
- mindspore/train/callback/_summary_collector.py +1 -1
- mindspore/train/dataset_helper.py +1 -0
- mindspore/train/model.py +2 -2
- mindspore/train/serialization.py +97 -11
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +23 -7
- mindspore/version.py +1 -1
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +160 -151
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
- mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- /mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
|
@@ -20,7 +20,7 @@ import os
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class
|
|
23
|
+
class AscendMsprofDataGeneratorOld:
|
|
24
24
|
"""Generate ascend data from files."""
|
|
25
25
|
|
|
26
26
|
def __init__(self, source_path):
|
|
@@ -47,10 +47,15 @@ class AscendMsprofDataGenerator:
|
|
|
47
47
|
'Output Data Types': {'index': self.invalid_index, 'dtype': ('Output Data Types', object)},
|
|
48
48
|
'Output Formats': {'index': self.invalid_index, 'dtype': ('Output Formats', object)},
|
|
49
49
|
}
|
|
50
|
-
self.
|
|
50
|
+
self.op_summaryA_extend_name = {
|
|
51
51
|
'vector_fops': {'index': self.invalid_index, 'dtype': ('vector_fops', float)},
|
|
52
52
|
'cube_fops': {'index': self.invalid_index, 'dtype': ('cube_fops', float)},
|
|
53
53
|
}
|
|
54
|
+
|
|
55
|
+
self.op_summaryB_extend_name = {
|
|
56
|
+
'aiv_vector_fops': {'index': self.invalid_index, 'dtype': ('vector_fops', float)},
|
|
57
|
+
'aic_cube_fops': {'index': self.invalid_index, 'dtype': ('cube_fops', float)},
|
|
58
|
+
}
|
|
54
59
|
self.op_summary_name = None
|
|
55
60
|
|
|
56
61
|
self.op_statistic_name = {
|
|
@@ -110,10 +115,15 @@ class AscendMsprofDataGenerator:
|
|
|
110
115
|
iteration = int(file.split('_')[-1].split('.')[0])
|
|
111
116
|
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
|
|
112
117
|
header = next(reader)
|
|
113
|
-
self.link_index_with_name(header, self.op_summary_basis_name)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
118
|
+
flag = self.link_index_with_name(header, self.op_summary_basis_name)
|
|
119
|
+
if not flag:
|
|
120
|
+
raise RuntimeError("Read op summary failed. The file is missing basic fields.")
|
|
121
|
+
extend_flag_A = self.link_index_with_name(header, self.op_summaryA_extend_name)
|
|
122
|
+
extend_flag_B = self.link_index_with_name(header, self.op_summaryB_extend_name)
|
|
123
|
+
if extend_flag_A:
|
|
124
|
+
self.op_summary_name = {**self.op_summary_basis_name, **self.op_summaryA_extend_name}
|
|
125
|
+
elif extend_flag_B:
|
|
126
|
+
self.op_summary_name = {**self.op_summary_basis_name, **self.op_summaryB_extend_name}
|
|
117
127
|
else:
|
|
118
128
|
self.op_summary_name = self.op_summary_basis_name
|
|
119
129
|
self.op_summary_name['Iteration ID'] = {'index': -1, 'dtype': ('Iteration ID', object)}
|
|
@@ -137,7 +147,9 @@ class AscendMsprofDataGenerator:
|
|
|
137
147
|
with open(file, newline='') as csvfile:
|
|
138
148
|
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
|
|
139
149
|
header = next(reader)
|
|
140
|
-
self.link_index_with_name(header, self.op_statistic_name)
|
|
150
|
+
flag = self.link_index_with_name(header, self.op_statistic_name)
|
|
151
|
+
if not flag:
|
|
152
|
+
raise RuntimeError("Read op summary failed. The file is missing basic fields.")
|
|
141
153
|
for row in reader:
|
|
142
154
|
row = [row[index.get('index')] for index in self.op_statistic_name.values()]
|
|
143
155
|
row = ['0' if i == 'N/A' else i for i in row]
|
|
@@ -155,7 +167,9 @@ class AscendMsprofDataGenerator:
|
|
|
155
167
|
with open(file, newline='') as csvfile:
|
|
156
168
|
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
|
|
157
169
|
header = next(reader)
|
|
158
|
-
self.link_index_with_name(header, self.steptrace_name)
|
|
170
|
+
flag = self.link_index_with_name(header, self.steptrace_name)
|
|
171
|
+
if not flag:
|
|
172
|
+
raise RuntimeError("Read op summary failed. The file is missing basic fields.")
|
|
159
173
|
for row in reader:
|
|
160
174
|
rows = [row[index.get('index')] for index in self.steptrace_name.values()]
|
|
161
175
|
if row[9:]:
|
|
@@ -182,7 +196,7 @@ class AscendMsprofDataGenerator:
|
|
|
182
196
|
for i in range(len(self.steptrace_name), len(header), 2):
|
|
183
197
|
name = f'hccl_{i}'
|
|
184
198
|
self.steptrace_name[name] = {'index': i, 'dtype': (name, float)}
|
|
185
|
-
self.steptrace_name[f'{name} duration'] = {'index': i+1, 'dtype': (f'{name} duration', float)}
|
|
199
|
+
self.steptrace_name[f'{name} duration'] = {'index': i + 1, 'dtype': (f'{name} duration', float)}
|
|
186
200
|
|
|
187
201
|
steptrace_dt = np.dtype([value['dtype'] for value in self.steptrace_name.values()])
|
|
188
202
|
|
|
@@ -197,3 +211,176 @@ class AscendMsprofDataGenerator:
|
|
|
197
211
|
|
|
198
212
|
for name in self.steptrace.dtype.names[9:]:
|
|
199
213
|
self.steptrace[name] = self.steptrace[name] * 1e-3
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class AscendMsprofDataGenerator:
|
|
217
|
+
"""Generate ascend data from files."""
|
|
218
|
+
|
|
219
|
+
def __init__(self, source_path):
|
|
220
|
+
self.source_path = source_path
|
|
221
|
+
self.op_summary = None
|
|
222
|
+
self.op_statistic = None
|
|
223
|
+
self.steptrace = []
|
|
224
|
+
|
|
225
|
+
self.op_summary_type = [
|
|
226
|
+
('Model ID', int),
|
|
227
|
+
('Task ID', int),
|
|
228
|
+
('Stream ID', int),
|
|
229
|
+
('Op Name', object),
|
|
230
|
+
('Op Type', object),
|
|
231
|
+
('Task Type', object),
|
|
232
|
+
('Task Start Time', float),
|
|
233
|
+
('Task Duration', float),
|
|
234
|
+
('Task Wait Time', float),
|
|
235
|
+
('Input Shapes', object),
|
|
236
|
+
('Input Data Types', object),
|
|
237
|
+
('Input Formats', object),
|
|
238
|
+
('Output Shapes', object),
|
|
239
|
+
('Output Data Types', object),
|
|
240
|
+
('Output Formats', object)
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
self.op_statistic_type = [
|
|
244
|
+
('Op Type', object),
|
|
245
|
+
('Count', int),
|
|
246
|
+
('Total Time', float),
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
self.steptrace_type = [
|
|
250
|
+
('Iteration ID', int),
|
|
251
|
+
('FP Start', float),
|
|
252
|
+
('BP End', float),
|
|
253
|
+
('Iteration End', float),
|
|
254
|
+
('Iteration Time', float),
|
|
255
|
+
('FP to BP Time', float),
|
|
256
|
+
('Iteration Refresh', float),
|
|
257
|
+
('Data Aug Bound', float),
|
|
258
|
+
('Model ID', int),
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
@staticmethod
|
|
262
|
+
def find_files(directory, pattern):
|
|
263
|
+
"""Find files with feature 'pattern' from the directory"""
|
|
264
|
+
|
|
265
|
+
for root, _, files in os.walk(directory):
|
|
266
|
+
files.sort(key=lambda x: os.path.getctime(os.path.join(directory, x)))
|
|
267
|
+
for basename in files:
|
|
268
|
+
if fnmatch.fnmatch(basename, pattern):
|
|
269
|
+
filename = os.path.join(root, basename)
|
|
270
|
+
yield filename
|
|
271
|
+
|
|
272
|
+
def parse(self):
|
|
273
|
+
"""read msprof data generate DataFrame data"""
|
|
274
|
+
self._read_op_summary()
|
|
275
|
+
|
|
276
|
+
self._read_op_statistic()
|
|
277
|
+
|
|
278
|
+
self._read_steptrace()
|
|
279
|
+
|
|
280
|
+
return self.op_summary, self.op_statistic, self.steptrace
|
|
281
|
+
|
|
282
|
+
def _read_op_summary(self):
|
|
283
|
+
"""read op summary to memory"""
|
|
284
|
+
op_summary = []
|
|
285
|
+
for file in self.find_files(self.source_path, "op_summary*.csv"):
|
|
286
|
+
with open(file, newline='') as csvfile:
|
|
287
|
+
reader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
|
|
288
|
+
for row in reader:
|
|
289
|
+
vector_fops = row.get('vector_fops', None)
|
|
290
|
+
cube_fops = row.get('cube_fops', None)
|
|
291
|
+
aiv_vector_fops = row.get('aiv_vector_fops', None)
|
|
292
|
+
aic_cube_fops = row.get('aic_cube_fops', None)
|
|
293
|
+
|
|
294
|
+
new_row = [
|
|
295
|
+
row.get('Model ID'),
|
|
296
|
+
row.get('Task ID'),
|
|
297
|
+
row.get('Stream ID'),
|
|
298
|
+
row.get('Op Name'),
|
|
299
|
+
row.get('OP Type'),
|
|
300
|
+
row.get('Task Type'),
|
|
301
|
+
row.get('Task Start Time(us)'),
|
|
302
|
+
row.get('Task Duration(us)'),
|
|
303
|
+
row.get('Task Wait Time(us)'),
|
|
304
|
+
row.get('Input Shapes'),
|
|
305
|
+
row.get('Input Data Types'),
|
|
306
|
+
row.get('Input Formats'),
|
|
307
|
+
row.get('Output Shapes'),
|
|
308
|
+
row.get('Output Data Types'),
|
|
309
|
+
row.get('Output Formats')
|
|
310
|
+
]
|
|
311
|
+
|
|
312
|
+
if vector_fops is not None and cube_fops is not None:
|
|
313
|
+
new_row.append(vector_fops)
|
|
314
|
+
new_row.append(cube_fops)
|
|
315
|
+
|
|
316
|
+
elif aic_cube_fops is not None and aiv_vector_fops is not None:
|
|
317
|
+
new_row.append(aiv_vector_fops)
|
|
318
|
+
new_row.append(aic_cube_fops)
|
|
319
|
+
|
|
320
|
+
new_row = tuple(['0' if d == 'N/A' else d for d in new_row])
|
|
321
|
+
op_summary.append(new_row)
|
|
322
|
+
break
|
|
323
|
+
|
|
324
|
+
if op_summary and len(op_summary[0]) > len(self.op_summary_type):
|
|
325
|
+
self.op_summary_type.extend([
|
|
326
|
+
('vector_fops', float),
|
|
327
|
+
('cube_fops', float)
|
|
328
|
+
])
|
|
329
|
+
op_summary_dt = np.dtype(self.op_summary_type)
|
|
330
|
+
|
|
331
|
+
self.op_summary = np.array(op_summary, dtype=op_summary_dt)
|
|
332
|
+
self.op_summary['Task Start Time'] *= 1e-3
|
|
333
|
+
self.op_summary['Task Duration'] *= 1e-3
|
|
334
|
+
self.op_summary['Task Wait Time'] *= 1e-3
|
|
335
|
+
|
|
336
|
+
def _read_op_statistic(self):
|
|
337
|
+
"""read op statistic to memory"""
|
|
338
|
+
op_statistic = []
|
|
339
|
+
for file in self.find_files(self.source_path, "op_statistic*.csv"):
|
|
340
|
+
with open(file, newline='') as csvfile:
|
|
341
|
+
reader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
|
|
342
|
+
for row in reader:
|
|
343
|
+
new_row = (
|
|
344
|
+
row.get('OP Type'),
|
|
345
|
+
row.get('Count'),
|
|
346
|
+
row.get('Total Time(us)'),
|
|
347
|
+
)
|
|
348
|
+
new_row = tuple(['0' if d == 'N/A' else d for d in new_row])
|
|
349
|
+
op_statistic.append(new_row)
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
op_statistic_dt = np.dtype(self.op_statistic_type)
|
|
353
|
+
self.op_statistic = np.array(op_statistic, dtype=op_statistic_dt)
|
|
354
|
+
self.op_statistic['Total Time'] *= 1e-3
|
|
355
|
+
|
|
356
|
+
def _read_steptrace(self):
|
|
357
|
+
"""read steptrace to memory"""
|
|
358
|
+
steptrace = []
|
|
359
|
+
for file in self.find_files(self.source_path, "step_trace*.csv"):
|
|
360
|
+
with open(file, newline='') as csvfile:
|
|
361
|
+
reader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
|
|
362
|
+
for row in reader:
|
|
363
|
+
new_row = [
|
|
364
|
+
row.get('Iteration ID'),
|
|
365
|
+
row.get('FP Start(us)'),
|
|
366
|
+
row.get('BP End(us)'),
|
|
367
|
+
row.get('Iteration End(us)'),
|
|
368
|
+
row.get('Iteration Time(us)'),
|
|
369
|
+
row.get('FP to BP Time(us)'),
|
|
370
|
+
row.get('Iteration Refresh(us)'),
|
|
371
|
+
row.get('Data Aug Bound(us)'),
|
|
372
|
+
row.get('Model ID'),
|
|
373
|
+
]
|
|
374
|
+
new_row = ['0' if i == 'N/A' else i for i in new_row]
|
|
375
|
+
steptrace.append(tuple(new_row))
|
|
376
|
+
|
|
377
|
+
steptrace_dt = np.dtype(self.steptrace_type)
|
|
378
|
+
|
|
379
|
+
self.steptrace = np.array(steptrace, dtype=steptrace_dt)
|
|
380
|
+
self.steptrace['FP Start'] = self.steptrace['FP Start'] * 1e-3
|
|
381
|
+
self.steptrace['BP End'] = self.steptrace['BP End'] * 1e-3
|
|
382
|
+
self.steptrace['Iteration End'] = self.steptrace['Iteration End'] * 1e-3
|
|
383
|
+
self.steptrace['Iteration Time'] = self.steptrace['Iteration Time'] * 1e-3
|
|
384
|
+
self.steptrace['FP to BP Time'] = self.steptrace['FP to BP Time'] * 1e-3
|
|
385
|
+
self.steptrace['Iteration Refresh'] = self.steptrace['Iteration Refresh'] * 1e-3
|
|
386
|
+
self.steptrace['Data Aug Bound'] = self.steptrace['Data Aug Bound'] * 1e-3
|
|
@@ -124,7 +124,7 @@ class AscendOPGenerator:
|
|
|
124
124
|
writer.writerows(self.aicpu_detail.tolist())
|
|
125
125
|
except (IOError, OSError) as err:
|
|
126
126
|
logging.critical('Errot occurred when write aicpu detail file: %s', err)
|
|
127
|
-
raise ProfilerIOException()
|
|
127
|
+
raise ProfilerIOException() from err
|
|
128
128
|
if os.path.exists(aicpu_intermediate_detail_path):
|
|
129
129
|
os.chmod(aicpu_intermediate_detail_path, stat.S_IREAD | stat.S_IWRITE)
|
|
130
130
|
|
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""The integrator for integrating parsed profiling files."""
|
|
16
|
-
import os
|
|
17
16
|
|
|
18
17
|
import numpy as np
|
|
19
18
|
from mindspore import log as logger
|
|
@@ -22,7 +21,6 @@ from mindspore.profiler.parser.base_timeline_generator import BaseTimelineGenera
|
|
|
22
21
|
from mindspore.profiler.parser.container import TimelineContainer
|
|
23
22
|
from mindspore.profiler.parser.cpu_gpu_timeline_generator import CpuTimelineGenerator
|
|
24
23
|
from mindspore.profiler.parser.integrator import DeviceTarget
|
|
25
|
-
from mindspore.profiler.parser.op_intermediate_parser import OPIntermediateParser
|
|
26
24
|
|
|
27
25
|
|
|
28
26
|
class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
@@ -51,16 +49,6 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
51
49
|
[('Op Name', object), ('Stream ID', int), ('Task Start Time', float), ('Task Duration', float),
|
|
52
50
|
('pid', int)])
|
|
53
51
|
|
|
54
|
-
@staticmethod
|
|
55
|
-
def _get_all_reduce_names(communication_info):
|
|
56
|
-
names = []
|
|
57
|
-
for info in communication_info:
|
|
58
|
-
# all_reduce_name format: stream_stream_id_stream_op_index_opname
|
|
59
|
-
all_reduce_name = info[0][info[0].rindex('_') + 1:]
|
|
60
|
-
if all_reduce_name not in names:
|
|
61
|
-
names.append(all_reduce_name)
|
|
62
|
-
return names
|
|
63
|
-
|
|
64
52
|
def init_timeline(self, op_summary, steptrace):
|
|
65
53
|
"""
|
|
66
54
|
Init timeline metadata, adding all collected info.
|
|
@@ -160,49 +148,19 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
160
148
|
self._timeline_meta = self._format_meta_data_list
|
|
161
149
|
|
|
162
150
|
# Update timeline summary info
|
|
163
|
-
timeline_summary = op_summary[
|
|
164
|
-
'Op Name', 'Stream ID', 'Task Duration']]
|
|
151
|
+
timeline_summary = op_summary[['Op Name', 'Stream ID', 'Task Duration']]
|
|
165
152
|
self._timeline_summary['total_time'] = np.sum(timeline_summary['Task Duration'])
|
|
166
153
|
self._timeline_summary['num_of_streams'] = int(
|
|
167
154
|
len(np.unique(timeline_summary['Stream ID'], return_counts=True)[0]))
|
|
168
155
|
self._timeline_summary['num_of_ops'] = int(len(np.unique(timeline_summary['Op Name'], return_counts=True)[0]))
|
|
169
156
|
self._timeline_summary['op_exe_times'] = int(len(timeline_summary))
|
|
170
|
-
self._timeline_summary['
|
|
171
|
-
|
|
157
|
+
if self._timeline_summary['op_exe_times'] != 0:
|
|
158
|
+
self._timeline_summary['max_scope_name_num'] = int(np.max(
|
|
159
|
+
[len(x) for x in np.char.split(timeline_summary['Op Name'].astype(str), sep='/')]))
|
|
160
|
+
else:
|
|
161
|
+
self._timeline_summary['max_scope_name_num'] = 0
|
|
172
162
|
logger.info('Finished adding info into timeline...')
|
|
173
163
|
|
|
174
|
-
def init_pynative_timeline(self):
|
|
175
|
-
"""Init timeline for pynative model."""
|
|
176
|
-
timeline_list = OPIntermediateParser(self._profiling_dir, self._rank_id).get_timeline_data()
|
|
177
|
-
cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._rank_id, self._model)
|
|
178
|
-
cpu_timeline_list = cpu_timeline_generator.load_cpu_op_data()
|
|
179
|
-
if cpu_timeline_list:
|
|
180
|
-
self._pynative_clock_synchronize(cpu_timeline_list)
|
|
181
|
-
timeline_list.extend(cpu_timeline_list)
|
|
182
|
-
|
|
183
|
-
self._register_op_name(timeline_list)
|
|
184
|
-
self._timeline_summary['op_exe_times'] = len(timeline_list)
|
|
185
|
-
self._max_scope_name_num = self._get_max_scope_name_num(timeline_list)
|
|
186
|
-
self._timeline_summary['max_scope_name_num'] = self._max_scope_name_num
|
|
187
|
-
self._timeline_summary['num_of_ops'] = len(self._op_name_list)
|
|
188
|
-
|
|
189
|
-
timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
190
|
-
min_cycle_counter = float(timeline_list[0][self._start_time_idx])
|
|
191
|
-
|
|
192
|
-
step_timeline = self._pynative_get_step_timeline_list(timeline_list)
|
|
193
|
-
timeline_list.extend(step_timeline)
|
|
194
|
-
|
|
195
|
-
stream_count_dict = {}
|
|
196
|
-
max_scope_name_num = 0
|
|
197
|
-
for timeline in timeline_list:
|
|
198
|
-
self._parse_timeline_data(timeline, min_cycle_counter)
|
|
199
|
-
self._update_num_of_streams(timeline, stream_count_dict)
|
|
200
|
-
cur_scope_name_num = len(timeline[self._op_name_idx].split('/')) - 1
|
|
201
|
-
max_scope_name_num = max(cur_scope_name_num, max_scope_name_num)
|
|
202
|
-
|
|
203
|
-
self._timeline_summary['max_scope_name_num'] = max_scope_name_num
|
|
204
|
-
self._timeline_summary['num_of_streams'] = len(stream_count_dict)
|
|
205
|
-
|
|
206
164
|
def _parse_timeline_data(self, timeline, min_cycle_counter):
|
|
207
165
|
"""Parse timeline data."""
|
|
208
166
|
# factor to convert the time unit from 1ms to 1us for timeline display
|
|
@@ -233,94 +191,6 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
233
191
|
self._update_format_meta_data(timeline_dict)
|
|
234
192
|
self._timeline_meta.append(timeline_dict)
|
|
235
193
|
|
|
236
|
-
def _get_op_timeline(self, communication_info, source_path):
|
|
237
|
-
"""get ai_core and cpu timeline."""
|
|
238
|
-
all_reduce_names = AscendTimelineGenerator._get_all_reduce_names(communication_info)
|
|
239
|
-
timeline_list = OPIntermediateParser(self._profiling_dir, self._rank_id).get_timeline_data(all_reduce_names)
|
|
240
|
-
for timeline in timeline_list:
|
|
241
|
-
timeline[self._tid_idx] = f"Stream #{timeline[self._tid_idx]}"
|
|
242
|
-
|
|
243
|
-
cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._rank_id, self._model)
|
|
244
|
-
cpu_timeline_list = cpu_timeline_generator.get_timeline_data()
|
|
245
|
-
if cpu_timeline_list:
|
|
246
|
-
self._clock_synchronize_to_device(cpu_timeline_list, source_path)
|
|
247
|
-
timeline_list.extend(cpu_timeline_list)
|
|
248
|
-
timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
249
|
-
self._max_scope_name_num = self._get_max_scope_name_num(timeline_list)
|
|
250
|
-
self._timeline_summary['op_exe_times'] = len(timeline_list)
|
|
251
|
-
self._timeline_summary['max_scope_name_num'] = self._max_scope_name_num
|
|
252
|
-
return timeline_list
|
|
253
|
-
|
|
254
|
-
def _clock_synchronize_to_device(self, timeline_list, source_path):
|
|
255
|
-
"""Synchronize the timestamp from host to device."""
|
|
256
|
-
host_start_file_path = os.path.join(source_path, f"host_start.log.{self._device_id}")
|
|
257
|
-
dev_start_file_path = os.path.join(source_path, f"dev_start.log.{self._device_id}")
|
|
258
|
-
host_monotonic = 0
|
|
259
|
-
dev_cntvct = 0
|
|
260
|
-
try:
|
|
261
|
-
with open(host_start_file_path) as f_obj:
|
|
262
|
-
lines = f_obj.readlines()
|
|
263
|
-
for line in lines:
|
|
264
|
-
info = line.strip().split(':')
|
|
265
|
-
if len(info) < 2 or info[0] != "clock_monotonic_raw":
|
|
266
|
-
continue
|
|
267
|
-
host_monotonic = int(info[1])
|
|
268
|
-
break
|
|
269
|
-
|
|
270
|
-
with open(dev_start_file_path) as f_obj:
|
|
271
|
-
lines = f_obj.readlines()
|
|
272
|
-
for line in lines:
|
|
273
|
-
info = line.strip().split(':')
|
|
274
|
-
if len(info) < 2 or info[0] != "cntvct":
|
|
275
|
-
continue
|
|
276
|
-
dev_cntvct = int(info[1])
|
|
277
|
-
break
|
|
278
|
-
except (IOError, OSError) as err:
|
|
279
|
-
logger.critical('Error occurred when read dev_start.log: %s', err)
|
|
280
|
-
raise ProfilerIOException() from err
|
|
281
|
-
if host_monotonic == 0 or dev_cntvct == 0:
|
|
282
|
-
logger.error('Error occurred when read host_monotonic or dev_cntvct time')
|
|
283
|
-
|
|
284
|
-
factor = {"factor_ns_to_ms": 1e-6, "factor_ten_ns_to_ns": 10, "factor_ms_to_ns": 1e6}
|
|
285
|
-
for idx, time_item in enumerate(timeline_list):
|
|
286
|
-
host_time = int(float(time_item[self._start_time_idx]) * factor.get("factor_ms_to_ns"))
|
|
287
|
-
device_time = dev_cntvct * factor.get("factor_ten_ns_to_ns") + (host_time - host_monotonic)
|
|
288
|
-
timeline_list[idx][self._start_time_idx] = device_time * factor.get("factor_ns_to_ms")
|
|
289
|
-
|
|
290
|
-
def _add_framework_info(self, framework_obj_list):
|
|
291
|
-
"""
|
|
292
|
-
Add framework info into timeline metadata.
|
|
293
|
-
|
|
294
|
-
Args:
|
|
295
|
-
framework_obj_list (list): The framework metadata.
|
|
296
|
-
"""
|
|
297
|
-
logger.debug('Start adding framework info into timeline...')
|
|
298
|
-
# Get the framework info that will be written into timeline.
|
|
299
|
-
framework_info_dict = {}
|
|
300
|
-
for framework_obj in framework_obj_list:
|
|
301
|
-
op_name = framework_obj[0]
|
|
302
|
-
op_type = framework_obj[1]
|
|
303
|
-
op_full_name = framework_obj[4]
|
|
304
|
-
op_info = framework_obj[5]
|
|
305
|
-
framework_info = {
|
|
306
|
-
'name': op_name,
|
|
307
|
-
'args': {
|
|
308
|
-
'type': op_type,
|
|
309
|
-
'fullname': op_full_name
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
framework_info.get('args').update(op_info)
|
|
313
|
-
framework_info_dict[op_full_name] = framework_info
|
|
314
|
-
|
|
315
|
-
# Insert framework info into timeline.
|
|
316
|
-
for timeline_item in self._timeline_meta:
|
|
317
|
-
op_full_name = timeline_item.get('name')
|
|
318
|
-
framework_item = framework_info_dict.get(op_full_name)
|
|
319
|
-
if framework_item:
|
|
320
|
-
timeline_item['name'] = framework_item.get('name')
|
|
321
|
-
timeline_item['args'] = framework_item.get('args')
|
|
322
|
-
logger.debug('Finished adding framework info into timeline...')
|
|
323
|
-
|
|
324
194
|
def _produce_two_separated_timeline(self, timeline, op_name):
|
|
325
195
|
"""Produce two separated timeline based on op_name."""
|
|
326
196
|
timeline_include_op_name = []
|
|
@@ -490,52 +360,6 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
490
360
|
|
|
491
361
|
return intersection_segment_display_list
|
|
492
362
|
|
|
493
|
-
def _pynative_get_step_timeline_list(self, timeline_list):
|
|
494
|
-
"""Get step timeline list for pynative model."""
|
|
495
|
-
step_list = []
|
|
496
|
-
# The timeline starts with the GetNext op
|
|
497
|
-
if len(timeline_list) < 2 or 'GetNext' not in timeline_list[0][self._op_name_idx] and \
|
|
498
|
-
'GetNext' not in timeline_list[1][self._op_name_idx]:
|
|
499
|
-
return step_list
|
|
500
|
-
step = [-1, -1]
|
|
501
|
-
step_num = 0
|
|
502
|
-
tid = "Steps"
|
|
503
|
-
for timeline in timeline_list:
|
|
504
|
-
if 'GetNext' not in timeline[self._op_name_idx]:
|
|
505
|
-
continue
|
|
506
|
-
start_time = float(timeline[self._start_time_idx])
|
|
507
|
-
if step[0] == -1:
|
|
508
|
-
step[0] = start_time
|
|
509
|
-
else:
|
|
510
|
-
step[1] = start_time - step[0]
|
|
511
|
-
step_num = step_num + 1
|
|
512
|
-
step_list.append([str(step_num), tid, float(step[0]), step[1]])
|
|
513
|
-
step = [start_time, -1]
|
|
514
|
-
if step[0] != -1 and step[1] == -1:
|
|
515
|
-
step_num = step_num + 1
|
|
516
|
-
step_list.append([str(step_num), tid, float(step[0]),
|
|
517
|
-
float(timeline_list[-1][self._start_time_idx]) - step[0]])
|
|
518
|
-
return step_list
|
|
519
|
-
|
|
520
|
-
def _pynative_clock_synchronize(self, timeline_list):
|
|
521
|
-
"""Synchronize the timestamp from device to host."""
|
|
522
|
-
start_time_file_path = os.path.join(self._profiling_dir, f"start_time_{self._rank_id}.txt")
|
|
523
|
-
try:
|
|
524
|
-
with open(start_time_file_path) as f_obj:
|
|
525
|
-
lines = f_obj.readlines()
|
|
526
|
-
# lines[0] stores the host monotonic time of start training.
|
|
527
|
-
host_monotonic_start_time = int(lines[0].strip().split(':')[-1])
|
|
528
|
-
# lines[1] stores the gpu time of start training.
|
|
529
|
-
gpu_start_time = int(lines[1].strip().split(':')[-1])
|
|
530
|
-
except (IOError, OSError) as err:
|
|
531
|
-
logger.critical(f'Error occurred when read {start_time_file_path}: {err}')
|
|
532
|
-
raise ProfilerIOException()
|
|
533
|
-
time_diff = gpu_start_time * 1000 - host_monotonic_start_time
|
|
534
|
-
for idx, time_item in enumerate(timeline_list):
|
|
535
|
-
timeline_list[idx][self._start_time_idx] = int(time_item[self._start_time_idx]) + time_diff
|
|
536
|
-
timeline_list[idx][self._start_time_idx] = timeline_list[idx][self._start_time_idx] / 1000000
|
|
537
|
-
timeline_list[idx][self._duration_idx] = timeline_list[idx][self._duration_idx] / 1000
|
|
538
|
-
|
|
539
363
|
def _set_step_start_and_end_op_name(self, timeline_list):
|
|
540
364
|
"""Set the start and end operator full name of each step."""
|
|
541
365
|
if not timeline_list or len(timeline_list) < 2:
|
|
@@ -315,7 +315,7 @@ class BaseTimelineGenerator:
|
|
|
315
315
|
tid_name = timeline_dict['tid']
|
|
316
316
|
sort_index = 0
|
|
317
317
|
|
|
318
|
-
if tid_name in self._map_tid_name_to_int
|
|
318
|
+
if tid_name in self._map_tid_name_to_int:
|
|
319
319
|
sort_index, tid = self._map_tid_name_to_int.get(tid_name)
|
|
320
320
|
elif tid_name.startswith("Stream"):
|
|
321
321
|
tid = int(tid_name.split("#")[-1])
|
|
@@ -497,9 +497,9 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
497
497
|
)
|
|
498
498
|
if intersection_start < intersection_end:
|
|
499
499
|
intersection_segment_display_list.append(
|
|
500
|
-
[display_name, self._tid_dict
|
|
500
|
+
[display_name, self._tid_dict.get(display_name, ('',))[0],
|
|
501
501
|
intersection_start, (intersection_end - intersection_start) * factor_ns_to_us,
|
|
502
|
-
self._tid_dict
|
|
502
|
+
self._tid_dict.get(display_name, ('', ''))[1]]
|
|
503
503
|
)
|
|
504
504
|
if first_time_list[first_list_idx][self._duration_idx] >= \
|
|
505
505
|
second_time_list[second_list_idx][self._duration_idx]:
|
|
@@ -578,7 +578,7 @@ class GpuFrameWorkParser:
|
|
|
578
578
|
op_occurrences = int(op_detail[0])
|
|
579
579
|
op_total_time = float(op_detail[1])
|
|
580
580
|
op_avg_time = float(op_detail[2])
|
|
581
|
-
if op_shape in op_shape_dict
|
|
581
|
+
if op_shape in op_shape_dict:
|
|
582
582
|
# Classify according to the operator information of the same shape.
|
|
583
583
|
op_shape_dict.get(op_shape)[0] += op_occurrences
|
|
584
584
|
op_shape_dict.get(op_shape)[1] += op_total_time
|
|
@@ -76,6 +76,25 @@ class ProfilerInfo:
|
|
|
76
76
|
info["analyse_end_time"] = end_time
|
|
77
77
|
ProfilerInfo._profiler_info_dict.update(info)
|
|
78
78
|
|
|
79
|
+
@staticmethod
|
|
80
|
+
def set_export_start_time(start_time):
|
|
81
|
+
"""Set the export start time."""
|
|
82
|
+
info = dict()
|
|
83
|
+
info["export_start_time"] = start_time
|
|
84
|
+
ProfilerInfo._profiler_info_dict.update(info)
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def set_export_end_time(end_time):
|
|
88
|
+
"""Set the export end time."""
|
|
89
|
+
info = dict()
|
|
90
|
+
info["export_end_time"] = end_time
|
|
91
|
+
ProfilerInfo._profiler_info_dict.update(info)
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def set_export_flag(flag):
|
|
95
|
+
"""Set the graph id list."""
|
|
96
|
+
ProfilerInfo._profiler_info_dict["all_export"] = flag
|
|
97
|
+
|
|
79
98
|
@staticmethod
|
|
80
99
|
def set_graph_ids(graph_ids):
|
|
81
100
|
"""Set the graph id list."""
|