mindspore 2.2.10__cp37-cp37m-manylinux1_x86_64.whl → 2.2.14__cp37-cp37m-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +2 -1
- mindspore/_akg/akg/composite/build_module.py +95 -5
- mindspore/_akg/akg/topi/cpp/impl.py +1 -1
- mindspore/_akg/akg/tvm/_ffi/base.py +1 -1
- mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
- mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
- mindspore/_akg/akg/utils/util.py +18 -1
- mindspore/_c_dataengine.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_extends/parse/__init__.py +3 -2
- mindspore/_extends/parse/parser.py +6 -1
- mindspore/_extends/parse/standard_method.py +12 -2
- mindspore/_mindspore_offline_debug.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/common/_utils.py +16 -0
- mindspore/common/tensor.py +0 -2
- mindspore/communication/management.py +3 -0
- mindspore/context.py +34 -4
- mindspore/dataset/engine/cache_client.py +8 -5
- mindspore/dataset/engine/datasets.py +23 -0
- mindspore/dataset/engine/validators.py +1 -1
- mindspore/dataset/vision/py_transforms_util.py +2 -2
- mindspore/experimental/optim/lr_scheduler.py +5 -6
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +118 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
- mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
- mindspore/mindrecord/tools/cifar100_to_mr.py +49 -57
- mindspore/mindrecord/tools/cifar10_to_mr.py +46 -55
- mindspore/mindrecord/tools/csv_to_mr.py +3 -8
- mindspore/mindrecord/tools/mnist_to_mr.py +4 -9
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -4
- mindspore/nn/layer/activation.py +1 -1
- mindspore/nn/layer/embedding.py +2 -2
- mindspore/nn/layer/flash_attention.py +48 -135
- mindspore/nn/loss/loss.py +1 -1
- mindspore/nn/optim/ada_grad.py +2 -2
- mindspore/nn/optim/sgd.py +3 -2
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +6 -3
- mindspore/numpy/math_ops.py +1 -1
- mindspore/ops/__init__.py +3 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +0 -31
- mindspore/ops/_grad_experimental/grad_comm_ops.py +4 -2
- mindspore/ops/_grad_experimental/grad_inner_ops.py +8 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +37 -17
- mindspore/ops/_op_impl/aicpu/__init__.py +1 -0
- mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +38 -0
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/function/array_func.py +6 -5
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/linalg_func.py +21 -11
- mindspore/ops/function/math_func.py +3 -0
- mindspore/ops/function/nn_func.py +13 -11
- mindspore/ops/function/parameter_func.py +2 -0
- mindspore/ops/function/sparse_unary_func.py +2 -2
- mindspore/ops/function/vmap_func.py +1 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_embedding_cache_ops.py +1 -1
- mindspore/ops/operations/_grad_ops.py +3 -4
- mindspore/ops/operations/_inner_ops.py +56 -1
- mindspore/ops/operations/_quant_ops.py +4 -4
- mindspore/ops/operations/_rl_inner_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +15 -4
- mindspore/ops/operations/custom_ops.py +1 -1
- mindspore/ops/operations/debug_ops.py +1 -1
- mindspore/ops/operations/image_ops.py +3 -3
- mindspore/ops/operations/inner_ops.py +49 -0
- mindspore/ops/operations/math_ops.py +65 -3
- mindspore/ops/operations/nn_ops.py +95 -28
- mindspore/ops/operations/random_ops.py +2 -0
- mindspore/ops/operations/sparse_ops.py +4 -4
- mindspore/ops/silent_check.py +162 -0
- mindspore/parallel/__init__.py +3 -2
- mindspore/parallel/_auto_parallel_context.py +82 -3
- mindspore/parallel/_parallel_serialization.py +34 -2
- mindspore/parallel/_tensor.py +3 -1
- mindspore/parallel/_transformer/transformer.py +8 -8
- mindspore/parallel/checkpoint_transform.py +191 -45
- mindspore/profiler/parser/ascend_cluster_generator.py +111 -0
- mindspore/profiler/parser/ascend_communicate_generator.py +315 -0
- mindspore/profiler/parser/ascend_flops_generator.py +8 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +8 -2
- mindspore/profiler/parser/ascend_hccl_generator.py +2 -2
- mindspore/profiler/parser/ascend_msprof_exporter.py +30 -6
- mindspore/profiler/parser/ascend_msprof_generator.py +16 -5
- mindspore/profiler/parser/ascend_op_generator.py +15 -7
- mindspore/profiler/parser/ascend_timeline_generator.py +5 -2
- mindspore/profiler/parser/base_timeline_generator.py +11 -3
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -1
- mindspore/profiler/parser/framework_parser.py +8 -2
- mindspore/profiler/parser/memory_usage_parser.py +8 -2
- mindspore/profiler/parser/minddata_analyzer.py +8 -2
- mindspore/profiler/parser/minddata_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_analyzer.py +4 -2
- mindspore/profiler/parser/msadvisor_parser.py +9 -3
- mindspore/profiler/profiling.py +97 -25
- mindspore/rewrite/api/node.py +1 -1
- mindspore/rewrite/api/symbol_tree.py +2 -2
- mindspore/rewrite/parsers/for_parser.py +6 -6
- mindspore/rewrite/parsers/module_parser.py +4 -4
- mindspore/scipy/ops.py +55 -5
- mindspore/scipy/optimize/__init__.py +3 -2
- mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
- mindspore/train/callback/_checkpoint.py +8 -8
- mindspore/train/callback/_landscape.py +2 -3
- mindspore/train/callback/_summary_collector.py +6 -7
- mindspore/train/dataset_helper.py +6 -0
- mindspore/train/model.py +17 -5
- mindspore/train/serialization.py +6 -1
- mindspore/train/summary/_writer_pool.py +1 -1
- mindspore/train/summary/summary_record.py +5 -6
- mindspore/version.py +1 -1
- {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/METADATA +3 -2
- {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/RECORD +150 -158
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
- mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/WHEEL +0 -0
- {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/entry_points.txt +0 -0
- {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
"""communicate data analyze api file"""
|
|
16
|
+
import json
|
|
17
|
+
import re
|
|
18
|
+
import logging
|
|
19
|
+
import os
|
|
20
|
+
import stat
|
|
21
|
+
from collections import defaultdict
|
|
22
|
+
|
|
23
|
+
from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AscendCommunicationGenerator:
|
|
27
|
+
"""
|
|
28
|
+
load and split communication info by step
|
|
29
|
+
"""
|
|
30
|
+
COMMUNICATION_TIME_INFO = "Communication Time Info"
|
|
31
|
+
START_TIMESTAMP = "Start Timestamp(us)"
|
|
32
|
+
COMMUNICATION_BANDWIDTH_INFO = "Communication Bandwidth Info"
|
|
33
|
+
HCOM_SEND = "Send"
|
|
34
|
+
HCOM_RECEIVE = "Receive"
|
|
35
|
+
TOTAL = "Total"
|
|
36
|
+
SYNCHRONIZATION_TIME_RATIO = "Synchronization Time Ratio"
|
|
37
|
+
SYNCHRONIZATION_TIME_MS = "Synchronization Time(ms)"
|
|
38
|
+
WAIT_TIME_RATIO = "Wait Time Ratio"
|
|
39
|
+
TRANSIT_TIME_MS = "Transit Time(ms)"
|
|
40
|
+
TRANSIT_SIZE_MB = "Transit Size(MB)"
|
|
41
|
+
SIZE_DISTRIBUTION = "Size Distribution"
|
|
42
|
+
WAIT_TIME_MS = "Wait Time(ms)"
|
|
43
|
+
BANDWIDTH_GB_S = "Bandwidth(GB/s)"
|
|
44
|
+
COMMUNICATION = "communication.json"
|
|
45
|
+
COMMUNICATION_MATRIX = "communication_matrix.json"
|
|
46
|
+
P2P = "p2p"
|
|
47
|
+
COLLECTIVE = "collective"
|
|
48
|
+
TRANSPORT_TYPE = "Transport Type"
|
|
49
|
+
PATTERN1 = re.compile(r"receive|send")
|
|
50
|
+
PATTERN2 = re.compile(r"invalid|broadcast|allreduce|reduce|"
|
|
51
|
+
r"allgather|reducescatter|scatter|alltoall|alltoallv|alltoallvc")
|
|
52
|
+
|
|
53
|
+
def __init__(self, source_path):
|
|
54
|
+
super().__init__()
|
|
55
|
+
self.root_path = source_path
|
|
56
|
+
self.step_list = [{"step_id": None, "start_ts": 0, "end_ts": float('inf'), "comm_ops": {}}]
|
|
57
|
+
self.output_communication = {}
|
|
58
|
+
self.output_matrix_data = {}
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def combine_size_distribution(op_dict: dict, total_dict: dict):
|
|
62
|
+
"""combine size distribution"""
|
|
63
|
+
for size, size_info in op_dict.items():
|
|
64
|
+
total_dict[size][0] += size_info[0]
|
|
65
|
+
total_dict[size][1] += size_info[1]
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def compute_ratio(dividend: float, divisor: float):
|
|
69
|
+
"""compute ratio"""
|
|
70
|
+
if abs(divisor) < 1e-15:
|
|
71
|
+
return 0
|
|
72
|
+
return round(dividend / divisor, 4)
|
|
73
|
+
|
|
74
|
+
def parse(self) -> None:
|
|
75
|
+
"""parse"""
|
|
76
|
+
self.generate_communication()
|
|
77
|
+
self.generate_matrix()
|
|
78
|
+
|
|
79
|
+
def generate_communication(self):
|
|
80
|
+
"""
|
|
81
|
+
generate communication.json
|
|
82
|
+
"""
|
|
83
|
+
communication_file = os.path.join(self.root_path, self.COMMUNICATION)
|
|
84
|
+
with open(communication_file) as file:
|
|
85
|
+
communication_data = json.load(file)
|
|
86
|
+
if not communication_data:
|
|
87
|
+
return
|
|
88
|
+
self.split_comm_op_by_step(communication_data)
|
|
89
|
+
|
|
90
|
+
for step_info in self.step_list:
|
|
91
|
+
step = "step" + step_info.get("step_id") if step_info.get("step_id") else "step"
|
|
92
|
+
self.output_communication[step] = self.get_communication_ops_dict(step_info.get("comm_ops"))
|
|
93
|
+
|
|
94
|
+
def generate_matrix(self):
|
|
95
|
+
"""generate matrix"""
|
|
96
|
+
communication_file = os.path.join(self.root_path, self.COMMUNICATION_MATRIX)
|
|
97
|
+
with open(communication_file) as file:
|
|
98
|
+
matrix_data = json.load(file)
|
|
99
|
+
if not matrix_data:
|
|
100
|
+
return
|
|
101
|
+
matrix_data_by_step = self.split_matrix_by_step(matrix_data)
|
|
102
|
+
|
|
103
|
+
for step, comm_matrix_data in matrix_data_by_step.items():
|
|
104
|
+
self.output_matrix_data[step] = self.get_matrix_ops_dict(comm_matrix_data)
|
|
105
|
+
|
|
106
|
+
def split_comm_op_by_step(self, communication_data: dict):
|
|
107
|
+
"""split comm op by step"""
|
|
108
|
+
if len(self.step_list) == 1:
|
|
109
|
+
self.step_list[0]["comm_ops"] = communication_data
|
|
110
|
+
for communication_op, communication_op_info in communication_data.items():
|
|
111
|
+
start_time = communication_op_info.get(self.COMMUNICATION_TIME_INFO, {}).get(self.START_TIMESTAMP)
|
|
112
|
+
for step_info in self.step_list:
|
|
113
|
+
if step_info.get("start_ts", -1) <= start_time <= step_info.get("end_ts", -1):
|
|
114
|
+
step_info.get("comm_ops", {})[communication_op] = communication_op_info
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
def split_communication_p2p_ops(self, op_data: dict):
|
|
118
|
+
"""
|
|
119
|
+
split communicate
|
|
120
|
+
"""
|
|
121
|
+
comm_op_dict = {self.P2P: {}, self.COLLECTIVE: {}}
|
|
122
|
+
for communication_op, communication_info in op_data.items():
|
|
123
|
+
if communication_op.find(self.HCOM_SEND) != -1 or communication_op.find(self.HCOM_RECEIVE) != -1:
|
|
124
|
+
comm_op_dict[self.P2P][communication_op] = communication_info
|
|
125
|
+
elif communication_op.startswith(self.TOTAL):
|
|
126
|
+
continue
|
|
127
|
+
else:
|
|
128
|
+
comm_op_dict[self.COLLECTIVE][communication_op] = communication_info
|
|
129
|
+
return comm_op_dict
|
|
130
|
+
|
|
131
|
+
def split_matrix_by_step(self, matrix_data: dict) -> dict:
|
|
132
|
+
"""
|
|
133
|
+
split matrix by step
|
|
134
|
+
"""
|
|
135
|
+
matrix_data_by_step = {}
|
|
136
|
+
if self.is_step_list_empty():
|
|
137
|
+
matrix_data_by_step["step"] = matrix_data
|
|
138
|
+
return matrix_data_by_step
|
|
139
|
+
|
|
140
|
+
for comm_op in matrix_data:
|
|
141
|
+
for step_info in self.step_list:
|
|
142
|
+
if comm_op in step_info.get("comm_ops", {}):
|
|
143
|
+
step = "step" + step_info.get("step_id") if step_info.get("step_id") else "step"
|
|
144
|
+
matrix_data_by_step.setdefault(step, {})[comm_op] = matrix_data.get(comm_op)
|
|
145
|
+
break
|
|
146
|
+
return matrix_data_by_step
|
|
147
|
+
|
|
148
|
+
def get_communication_ops_dict(self, op_data: dict) -> dict:
|
|
149
|
+
"""get communication ops dict"""
|
|
150
|
+
comm_op_dict = self.split_communication_p2p_ops(op_data)
|
|
151
|
+
self.compute_total_info(comm_op_dict[self.P2P])
|
|
152
|
+
self.compute_total_info(comm_op_dict[self.COLLECTIVE])
|
|
153
|
+
return comm_op_dict
|
|
154
|
+
|
|
155
|
+
def integrate_matrix_data(self, comm_op_dict_simple):
|
|
156
|
+
"""integrate the matrix data"""
|
|
157
|
+
comm_op_dict = defaultdict(dict)
|
|
158
|
+
for new_comm_op_name, data in comm_op_dict_simple.items():
|
|
159
|
+
data.sort(key=lambda x: x[self.BANDWIDTH_GB_S], reverse=True)
|
|
160
|
+
t_type = data[0].get(self.TRANSPORT_TYPE, '')
|
|
161
|
+
t_size = sum(x.get(self.TRANSIT_SIZE_MB, 0) for x in data)
|
|
162
|
+
t_time = sum(x.get(self.TRANSIT_TIME_MS, 0) for x in data)
|
|
163
|
+
bandwidth = self.compute_ratio(t_size, t_time)
|
|
164
|
+
|
|
165
|
+
link = new_comm_op_name[2]
|
|
166
|
+
|
|
167
|
+
comm_op_dict[f'{new_comm_op_name[0]}-top1@{new_comm_op_name[1]}'].update({link: data[0]})
|
|
168
|
+
comm_op_dict[f'{new_comm_op_name[0]}-middle@{new_comm_op_name[1]}'].update({link: data[len(data) // 2]})
|
|
169
|
+
comm_op_dict[f'{new_comm_op_name[0]}-bottom1@{new_comm_op_name[1]}'].update({link: data[-1]})
|
|
170
|
+
index2 = -2
|
|
171
|
+
index3 = -3
|
|
172
|
+
if len(data) == 1:
|
|
173
|
+
index2 = -1
|
|
174
|
+
index3 = -1
|
|
175
|
+
elif len(data) == 2:
|
|
176
|
+
index3 = -2
|
|
177
|
+
comm_op_dict[f'{new_comm_op_name[0]}-bottom2@{new_comm_op_name[1]}'].update({link: data[index2]})
|
|
178
|
+
comm_op_dict[f'{new_comm_op_name[0]}-bottom3@{new_comm_op_name[1]}'].update({link: data[index3]})
|
|
179
|
+
comm_op_dict[f'{new_comm_op_name[0]}-total@{new_comm_op_name[1]}'].update({link: {
|
|
180
|
+
self.TRANSPORT_TYPE: t_type,
|
|
181
|
+
self.TRANSIT_SIZE_MB: t_size,
|
|
182
|
+
self.TRANSIT_TIME_MS: t_time,
|
|
183
|
+
self.BANDWIDTH_GB_S: bandwidth
|
|
184
|
+
}})
|
|
185
|
+
return comm_op_dict
|
|
186
|
+
|
|
187
|
+
def get_matrix_ops_dict(self, op_data: dict) -> dict:
|
|
188
|
+
"""parse matrix data"""
|
|
189
|
+
comm_op_dict_simple_p2p = defaultdict(list)
|
|
190
|
+
comm_op_dict_simple_collective = defaultdict(list)
|
|
191
|
+
|
|
192
|
+
for communication_op, communication_info in op_data.items():
|
|
193
|
+
if communication_op.find(self.HCOM_SEND) != -1 or communication_op.find(self.HCOM_RECEIVE) != -1:
|
|
194
|
+
|
|
195
|
+
match_obj = self.PATTERN1.search(communication_op.lower())
|
|
196
|
+
comm_op_type = match_obj.group()
|
|
197
|
+
for link, data in communication_info.items():
|
|
198
|
+
new_comm_op_name = (comm_op_type, communication_op.split("@")[-1], link)
|
|
199
|
+
data['op_name'] = communication_op.split("@")[0]
|
|
200
|
+
comm_op_dict_simple_p2p[new_comm_op_name].append(data)
|
|
201
|
+
|
|
202
|
+
elif communication_op.startswith(self.TOTAL):
|
|
203
|
+
continue
|
|
204
|
+
else:
|
|
205
|
+
match_obj = self.PATTERN2.search(communication_op.lower())
|
|
206
|
+
if not match_obj:
|
|
207
|
+
comm_op_type = communication_op.lower().split('/')[-1].split('-op')[0]
|
|
208
|
+
logging.warning("Communication operator type not found communication_op: %s, use comm_op_type: %s",
|
|
209
|
+
communication_op, comm_op_type)
|
|
210
|
+
else:
|
|
211
|
+
comm_op_type = match_obj.group()
|
|
212
|
+
|
|
213
|
+
for link, data in communication_info.items():
|
|
214
|
+
new_comm_op_name = (comm_op_type, communication_op.split("@")[-1], link)
|
|
215
|
+
data['op_name'] = communication_op.split("@")[0]
|
|
216
|
+
comm_op_dict_simple_collective[new_comm_op_name].append(data)
|
|
217
|
+
|
|
218
|
+
comm_op_dict = {self.P2P: self.integrate_matrix_data(comm_op_dict_simple_p2p),
|
|
219
|
+
self.COLLECTIVE: self.integrate_matrix_data(comm_op_dict_simple_collective)}
|
|
220
|
+
|
|
221
|
+
return comm_op_dict
|
|
222
|
+
|
|
223
|
+
def is_step_list_empty(self):
|
|
224
|
+
"""is step list empty"""
|
|
225
|
+
for step_info in self.step_list:
|
|
226
|
+
if step_info.get("comm_ops"):
|
|
227
|
+
return False
|
|
228
|
+
return True
|
|
229
|
+
|
|
230
|
+
def compute_total_info(self, comm_ops: dict):
|
|
231
|
+
"""
|
|
232
|
+
compute total info
|
|
233
|
+
"""
|
|
234
|
+
if not comm_ops:
|
|
235
|
+
return
|
|
236
|
+
total_time_info_dict = defaultdict(float)
|
|
237
|
+
total_bandwidth_info_dict = {}
|
|
238
|
+
for _, communication_op_info in comm_ops.items():
|
|
239
|
+
for com_info, com_info_dict in communication_op_info.items():
|
|
240
|
+
if com_info == self.COMMUNICATION_TIME_INFO:
|
|
241
|
+
self.combine_time_info(com_info_dict, total_time_info_dict)
|
|
242
|
+
if com_info == self.COMMUNICATION_BANDWIDTH_INFO:
|
|
243
|
+
self.combine_bandwidth_info(com_info_dict, total_bandwidth_info_dict)
|
|
244
|
+
self.compute_time_ratio(total_time_info_dict)
|
|
245
|
+
self.compute_bandwidth_ratio(total_bandwidth_info_dict)
|
|
246
|
+
comm_ops['Total Op Info'] = {
|
|
247
|
+
self.COMMUNICATION_TIME_INFO: total_time_info_dict,
|
|
248
|
+
self.COMMUNICATION_BANDWIDTH_INFO: total_bandwidth_info_dict
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
def combine_time_info(self, com_info_dict: dict, total_time_info_dict: dict):
|
|
252
|
+
"""combine time info"""
|
|
253
|
+
ratio_list = [self.WAIT_TIME_RATIO, self.SYNCHRONIZATION_TIME_RATIO]
|
|
254
|
+
for time_info in com_info_dict:
|
|
255
|
+
if time_info not in ratio_list and time_info != self.START_TIMESTAMP:
|
|
256
|
+
total_time_info_dict[time_info] += com_info_dict.get(time_info)
|
|
257
|
+
|
|
258
|
+
def combine_bandwidth_info(self, com_info_dict: dict, total_bandwidth_info_dict: dict):
|
|
259
|
+
"""
|
|
260
|
+
combine bandwidth info
|
|
261
|
+
"""
|
|
262
|
+
add_list = [self.TRANSIT_TIME_MS, self.TRANSIT_SIZE_MB]
|
|
263
|
+
dict_list = [self.SIZE_DISTRIBUTION]
|
|
264
|
+
for transport_type, part_transport_dict in com_info_dict.items():
|
|
265
|
+
if transport_type not in total_bandwidth_info_dict:
|
|
266
|
+
total_bandwidth_info_dict[transport_type] = {
|
|
267
|
+
self.TRANSIT_TIME_MS: 0,
|
|
268
|
+
self.TRANSIT_SIZE_MB: 0,
|
|
269
|
+
self.SIZE_DISTRIBUTION: defaultdict(lambda: [0, 0])
|
|
270
|
+
}
|
|
271
|
+
for bandwidth_msg, value in part_transport_dict.items():
|
|
272
|
+
if bandwidth_msg in add_list:
|
|
273
|
+
total_bandwidth_info_dict[transport_type][bandwidth_msg] += value
|
|
274
|
+
if bandwidth_msg in dict_list:
|
|
275
|
+
self.combine_size_distribution(value, total_bandwidth_info_dict[transport_type][bandwidth_msg])
|
|
276
|
+
|
|
277
|
+
def compute_time_ratio(self, total_time_info_dict: dict):
|
|
278
|
+
"""compute time ratio"""
|
|
279
|
+
total_time_info_dict[self.WAIT_TIME_RATIO] = \
|
|
280
|
+
self.compute_ratio(total_time_info_dict.get(self.WAIT_TIME_MS, 0),
|
|
281
|
+
total_time_info_dict.get(self.WAIT_TIME_MS, 0) +
|
|
282
|
+
total_time_info_dict.get(self.TRANSIT_TIME_MS, 0))
|
|
283
|
+
total_time_info_dict[self.SYNCHRONIZATION_TIME_RATIO] = \
|
|
284
|
+
self.compute_ratio(total_time_info_dict.get(self.SYNCHRONIZATION_TIME_MS, 0),
|
|
285
|
+
total_time_info_dict.get(self.TRANSIT_TIME_MS, 0) +
|
|
286
|
+
total_time_info_dict.get(self.SYNCHRONIZATION_TIME_MS, 0))
|
|
287
|
+
|
|
288
|
+
def compute_bandwidth_ratio(self, total_bandwidth_info_dict: dict):
|
|
289
|
+
"""compute bandwidth ratio"""
|
|
290
|
+
for _, bandwidth_dict in total_bandwidth_info_dict.items():
|
|
291
|
+
self.compute_ratio(bandwidth_dict.get(self.TRANSIT_SIZE_MB, 0), bandwidth_dict.get(self.TRANSIT_TIME_MS, 0))
|
|
292
|
+
|
|
293
|
+
def write(self, communication_file_path, communication_matrix_file_path):
|
|
294
|
+
"""
|
|
295
|
+
write communication file and communication matrix file
|
|
296
|
+
"""
|
|
297
|
+
try:
|
|
298
|
+
with os.fdopen(os.open(communication_file_path,
|
|
299
|
+
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
300
|
+
json.dump(self.output_communication, json_file)
|
|
301
|
+
except (IOError, OSError) as err:
|
|
302
|
+
logging.critical('Error occurred when write communication file: %s', err)
|
|
303
|
+
raise ProfilerIOException() from err
|
|
304
|
+
if os.path.exists(communication_file_path):
|
|
305
|
+
os.chmod(communication_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
306
|
+
|
|
307
|
+
try:
|
|
308
|
+
with os.fdopen(os.open(communication_matrix_file_path,
|
|
309
|
+
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
310
|
+
json.dump(self.output_matrix_data, json_file)
|
|
311
|
+
except (IOError, OSError) as err:
|
|
312
|
+
logging.critical('Error occurred when write communication matrix file: %s', err)
|
|
313
|
+
raise ProfilerIOException() from err
|
|
314
|
+
if os.path.exists(communication_matrix_file_path):
|
|
315
|
+
os.chmod(communication_matrix_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
@@ -26,13 +26,19 @@ from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException
|
|
|
26
26
|
class AscendFlopsGenerator:
|
|
27
27
|
"""Generate ascend flops data from DataFrame."""
|
|
28
28
|
|
|
29
|
-
def __init__(self, op_summary):
|
|
29
|
+
def __init__(self, op_summary, pretty=False):
|
|
30
30
|
self.op_summary = op_summary
|
|
31
31
|
self.flops_dt = np.dtype(
|
|
32
32
|
[('op_full_name', object), ('MFLOPs(10^6 cube)', float), ('GFLOPS(10^9 cube)', float),
|
|
33
33
|
('MFLOPs(10^6 vector)', float), ('GFLOPS(10^9 vector)', float)])
|
|
34
34
|
self.flops = None
|
|
35
35
|
self.flops_summary = None
|
|
36
|
+
self.pretty = pretty
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def indent(self):
|
|
40
|
+
indent = 1 if self.pretty else None
|
|
41
|
+
return indent
|
|
36
42
|
|
|
37
43
|
def parse(self):
|
|
38
44
|
"""Analyse the op_summary data generate flops data."""
|
|
@@ -86,7 +92,7 @@ class AscendFlopsGenerator:
|
|
|
86
92
|
with os.fdopen(os.open(flops_summary_path,
|
|
87
93
|
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR),
|
|
88
94
|
'w') as json_file:
|
|
89
|
-
json.dump(self.flops_summary, json_file)
|
|
95
|
+
json.dump(self.flops_summary, json_file, indent=self.indent)
|
|
90
96
|
except (IOError, OSError) as err:
|
|
91
97
|
logging.critical('Errot occurred when write step trace point info file: %s', err)
|
|
92
98
|
raise ProfilerIOException() from err
|
|
@@ -26,10 +26,16 @@ from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException
|
|
|
26
26
|
class AscendFPBPGenerator:
|
|
27
27
|
"""Generate ascend fp bp data from DataFrame."""
|
|
28
28
|
|
|
29
|
-
def __init__(self, op_summary, steptrace):
|
|
29
|
+
def __init__(self, op_summary, steptrace, pretty=False):
|
|
30
30
|
self.op_summary = op_summary
|
|
31
31
|
self.steptrace = steptrace
|
|
32
32
|
self.points = None
|
|
33
|
+
self.pretty = pretty
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def indent(self):
|
|
37
|
+
indent = 1 if self.pretty else None
|
|
38
|
+
return indent
|
|
33
39
|
|
|
34
40
|
def parse(self):
|
|
35
41
|
"""Analyse the op_summary and steptrace data generate fpbp data."""
|
|
@@ -68,7 +74,7 @@ class AscendFPBPGenerator:
|
|
|
68
74
|
with os.fdopen(os.open(step_trace_point_info_path,
|
|
69
75
|
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR),
|
|
70
76
|
'w') as json_file:
|
|
71
|
-
json.dump(self.points, json_file)
|
|
77
|
+
json.dump(self.points, json_file, indent=self.indent)
|
|
72
78
|
except (IOError, OSError) as err:
|
|
73
79
|
logging.critical('Errot occurred when write step trace point info file: %s', err)
|
|
74
80
|
raise ProfilerIOException() from err
|
|
@@ -208,8 +208,8 @@ class AscendHCCLGenerator:
|
|
|
208
208
|
name = row.get('name')
|
|
209
209
|
pid = row.get('pid')
|
|
210
210
|
tid = row.get('tid')
|
|
211
|
-
ts = row.get('ts')
|
|
212
|
-
dur = row.get('dur')
|
|
211
|
+
ts = float(row.get('ts'))
|
|
212
|
+
dur = float(row.get('dur'))
|
|
213
213
|
te = ts + dur
|
|
214
214
|
ph = row.get('ph')
|
|
215
215
|
task_type = row.get('args', {}).get('task type', '')
|
|
@@ -60,14 +60,20 @@ class AscendMsprofExporter:
|
|
|
60
60
|
|
|
61
61
|
def get_drv_version(self):
|
|
62
62
|
"""Get the drv_version for choosing the export mode."""
|
|
63
|
+
script_path = self._get_msprof_info_path()
|
|
64
|
+
if not script_path:
|
|
65
|
+
logger.warning("Can`t find get_msprof_info.py path, use single-export mode instead.")
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
logger.info("get_msprof_info.py path is : %s", script_path)
|
|
63
69
|
host_dir = os.path.join(self.prof_root_dir, 'host')
|
|
64
70
|
cmd = ['python',
|
|
65
|
-
|
|
71
|
+
script_path,
|
|
66
72
|
'-dir', host_dir]
|
|
67
73
|
try:
|
|
68
74
|
outs, _ = self._run_cmd(cmd)
|
|
69
75
|
if not outs:
|
|
70
|
-
logger.warning('Check the drvVersion can`t find the result, use single
|
|
76
|
+
logger.warning('Check the drvVersion can`t find the result, use single-export mode instead.')
|
|
71
77
|
return False
|
|
72
78
|
result = json.loads(outs)
|
|
73
79
|
logger.info('get drv_version result is : %s', result)
|
|
@@ -104,6 +110,10 @@ class AscendMsprofExporter:
|
|
|
104
110
|
self._run_cmd(msprof_export_cmd)
|
|
105
111
|
self._check_export_files(self.source_path)
|
|
106
112
|
|
|
113
|
+
msprof_analyze_cmd = [self._msprof_cmd, "--analyze=on", "--rule=communication,communication_matrix",
|
|
114
|
+
"--output={}".format(self.prof_root_dir)]
|
|
115
|
+
self._run_cmd(msprof_analyze_cmd)
|
|
116
|
+
|
|
107
117
|
return flag
|
|
108
118
|
|
|
109
119
|
def _run_cmd(self, cmd):
|
|
@@ -176,6 +186,18 @@ class AscendMsprofExporter:
|
|
|
176
186
|
|
|
177
187
|
logger.info("The msprof command has been added to the path!")
|
|
178
188
|
|
|
189
|
+
def _get_msprof_info_path(self):
|
|
190
|
+
"""Check the existence of get_msprof_info.py script"""
|
|
191
|
+
outs, _ = self._run_cmd(['which', self._msprof_cmd])
|
|
192
|
+
if not outs:
|
|
193
|
+
return ""
|
|
194
|
+
msprof_path = os.path.realpath(outs.strip())
|
|
195
|
+
sup_path = msprof_path.split('tools')[0]
|
|
196
|
+
script_path = os.path.join(sup_path, 'tools/profiler/profiler_tool/analysis/interface/get_msprof_info.py')
|
|
197
|
+
if not os.path.exists(script_path):
|
|
198
|
+
return ""
|
|
199
|
+
return script_path
|
|
200
|
+
|
|
179
201
|
def _generate_step_trace(self, prof_path, device_path):
|
|
180
202
|
""""generate model_id iteration_id dict"""
|
|
181
203
|
|
|
@@ -228,9 +250,10 @@ class AscendMsprofExporter:
|
|
|
228
250
|
op_statistic.add(summary_file)
|
|
229
251
|
|
|
230
252
|
if not op_summary:
|
|
231
|
-
|
|
253
|
+
logger.warning("The op_summary file was not found, perhaps the original data was not collected.")
|
|
254
|
+
return
|
|
232
255
|
if not op_statistic:
|
|
233
|
-
|
|
256
|
+
logger.warning("The op_statistics file was not found, perhaps the original data was not collected.")
|
|
234
257
|
|
|
235
258
|
logger.info("Finish checking files.")
|
|
236
259
|
|
|
@@ -250,7 +273,8 @@ class AscendMsprofExporter:
|
|
|
250
273
|
op_statistic.add(summary_file)
|
|
251
274
|
|
|
252
275
|
if not op_summary:
|
|
253
|
-
|
|
276
|
+
logger.warning("The op_summary file was not found, perhaps the original data was not collected.")
|
|
277
|
+
return
|
|
254
278
|
if not op_statistic:
|
|
255
|
-
|
|
279
|
+
logger.warning("The op_statistics file was not found, perhaps the original data was not collected.")
|
|
256
280
|
logger.info("Finish checking files.")
|
|
@@ -18,6 +18,7 @@ import fnmatch
|
|
|
18
18
|
import os
|
|
19
19
|
|
|
20
20
|
import numpy as np
|
|
21
|
+
from mindspore import log as logger
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
class AscendMsprofDataGeneratorOld:
|
|
@@ -128,13 +129,22 @@ class AscendMsprofDataGeneratorOld:
|
|
|
128
129
|
self.op_summary_name = self.op_summary_basis_name
|
|
129
130
|
self.op_summary_name['Iteration ID'] = {'index': -1, 'dtype': ('Iteration ID', object)}
|
|
130
131
|
for row in reader:
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
132
|
+
try:
|
|
133
|
+
row = [row[index.get('index')] for index in self.op_summary_name.values()]
|
|
134
|
+
row[self.op_summary_name['Iteration ID']['index']] = iteration
|
|
135
|
+
row = ['0' if i == 'N/A' else i for i in row]
|
|
136
|
+
row += ['0.000'] # Add one column for Task Start Time(us)
|
|
137
|
+
op_summary.append(tuple(row))
|
|
138
|
+
except IndexError:
|
|
139
|
+
logger.warning(f"Fail to read{file}. Will ignore this file and continue reading")
|
|
135
140
|
|
|
136
141
|
op_summary_dt = np.dtype([value['dtype'] for value in self.op_summary_name.values()])
|
|
137
142
|
|
|
143
|
+
for i in range(0, len(op_summary)):
|
|
144
|
+
if len(op_summary[i]) < len(op_summary_dt):
|
|
145
|
+
new_raw = [j for j in op_summary[i]]
|
|
146
|
+
new_raw.extend([0 for _ in range(len(op_summary_dt) - len(op_summary[i]))])
|
|
147
|
+
op_summary[i] = tuple(new_raw)
|
|
138
148
|
self.op_summary = np.array(op_summary, dtype=op_summary_dt)
|
|
139
149
|
self.op_summary['Task Start Time'] = self.op_summary['Task Start Time'] * 1e-3
|
|
140
150
|
self.op_summary['Task Duration'] = self.op_summary['Task Duration'] * 1e-3
|
|
@@ -348,7 +358,8 @@ class AscendMsprofDataGenerator:
|
|
|
348
358
|
new_row = tuple(['0' if d == 'N/A' else d for d in new_row])
|
|
349
359
|
op_statistic.append(new_row)
|
|
350
360
|
break
|
|
351
|
-
|
|
361
|
+
if not op_statistic:
|
|
362
|
+
return
|
|
352
363
|
op_statistic_dt = np.dtype(self.op_statistic_type)
|
|
353
364
|
self.op_statistic = np.array(op_statistic, dtype=op_statistic_dt)
|
|
354
365
|
self.op_statistic['Total Time'] *= 1e-3
|
|
@@ -35,6 +35,7 @@ class AscendOPGenerator:
|
|
|
35
35
|
self.aicpu_detail = None
|
|
36
36
|
self.framework_raw = None
|
|
37
37
|
self.output_timeline_data = None
|
|
38
|
+
self.has_statistic_file = True
|
|
38
39
|
|
|
39
40
|
self.op_detail_dt = np.dtype(
|
|
40
41
|
[('full_op_name', object), ('task_duration', float), ('execution_frequency', int), ('task_type', object)])
|
|
@@ -61,12 +62,16 @@ class AscendOPGenerator:
|
|
|
61
62
|
|
|
62
63
|
# aicore intermediation type
|
|
63
64
|
self.op_type = self._parse_op_type(self.op_statistic)
|
|
65
|
+
if isinstance(self.op_type, np.ndarray) and not self.op_type.size or not isinstance(self.op_type, np.ndarray) \
|
|
66
|
+
and not self.op_type:
|
|
67
|
+
self.has_statistic_file = False
|
|
64
68
|
|
|
65
69
|
# aicpu_intermediation
|
|
66
70
|
self.aicpu_detail = self._parse_aicpu_detail(self.op_summary)
|
|
67
71
|
|
|
68
72
|
# framwork_raw
|
|
69
|
-
|
|
73
|
+
if self.has_statistic_file:
|
|
74
|
+
self.framework_raw = self._parse_framework_raw(self.op_summary)
|
|
70
75
|
|
|
71
76
|
self.output_timeline_data = self.op_summary[self.op_summary['Task Type'] == 'AI_CORE'][
|
|
72
77
|
['Op Name', 'Stream ID', 'Task Start Time', 'Task Duration']]
|
|
@@ -84,7 +89,7 @@ class AscendOPGenerator:
|
|
|
84
89
|
output_timeline_data_path : output_timeline_data.txt path
|
|
85
90
|
"""
|
|
86
91
|
# aicore intermediation detail
|
|
87
|
-
if self.op_detail.shape[0] != 0:
|
|
92
|
+
if isinstance(self.op_detail, np.ndarray) and self.op_detail.size and self.op_detail.shape[0] != 0:
|
|
88
93
|
try:
|
|
89
94
|
with os.fdopen(os.open(aicore_intermediate_detail_path,
|
|
90
95
|
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR),
|
|
@@ -99,7 +104,7 @@ class AscendOPGenerator:
|
|
|
99
104
|
os.chmod(aicore_intermediate_detail_path, stat.S_IREAD | stat.S_IWRITE)
|
|
100
105
|
|
|
101
106
|
# aicore intermediation type
|
|
102
|
-
if self.op_type.shape[0] != 0:
|
|
107
|
+
if isinstance(self.op_type, np.ndarray) and self.op_type.size and self.op_type.shape[0] != 0:
|
|
103
108
|
try:
|
|
104
109
|
with os.fdopen(os.open(aicore_intermediate_type_path,
|
|
105
110
|
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR),
|
|
@@ -114,7 +119,7 @@ class AscendOPGenerator:
|
|
|
114
119
|
os.chmod(aicore_intermediate_type_path, stat.S_IREAD | stat.S_IWRITE)
|
|
115
120
|
|
|
116
121
|
# aicpu_intermediation
|
|
117
|
-
if self.aicpu_detail.shape[0] != 0:
|
|
122
|
+
if isinstance(self.aicpu_detail, np.ndarray) and self.aicpu_detail.size and self.aicpu_detail.shape[0] != 0:
|
|
118
123
|
try:
|
|
119
124
|
with os.fdopen(os.open(aicpu_intermediate_detail_path,
|
|
120
125
|
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR),
|
|
@@ -129,7 +134,7 @@ class AscendOPGenerator:
|
|
|
129
134
|
os.chmod(aicpu_intermediate_detail_path, stat.S_IREAD | stat.S_IWRITE)
|
|
130
135
|
|
|
131
136
|
# framwork_raw
|
|
132
|
-
if self.framework_raw.shape[0] != 0:
|
|
137
|
+
if isinstance(self.framework_raw, np.ndarray) and self.framework_raw.size and self.framework_raw.shape[0] != 0:
|
|
133
138
|
try:
|
|
134
139
|
with os.fdopen(os.open(framework_raw_path,
|
|
135
140
|
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR),
|
|
@@ -144,7 +149,8 @@ class AscendOPGenerator:
|
|
|
144
149
|
os.chmod(framework_raw_path, stat.S_IREAD | stat.S_IWRITE)
|
|
145
150
|
|
|
146
151
|
# output_timeline_data
|
|
147
|
-
if self.output_timeline_data.
|
|
152
|
+
if isinstance(self.output_timeline_data, np.ndarray) and self.output_timeline_data.size and \
|
|
153
|
+
self.output_timeline_data.shape[0] != 0 and output_timeline_data_path:
|
|
148
154
|
try:
|
|
149
155
|
with os.fdopen(os.open(output_timeline_data_path,
|
|
150
156
|
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR),
|
|
@@ -186,7 +192,9 @@ class AscendOPGenerator:
|
|
|
186
192
|
Args:
|
|
187
193
|
op_statistic(DataFrame): op statistic data.
|
|
188
194
|
"""
|
|
189
|
-
|
|
195
|
+
if isinstance(op_statistic, np.ndarray) and not op_statistic.size or not isinstance(op_statistic, np.ndarray) \
|
|
196
|
+
and not op_statistic:
|
|
197
|
+
return None
|
|
190
198
|
groups, _, inverse, _ = np.unique(op_statistic['Op Type'], return_index=True, return_inverse=True,
|
|
191
199
|
return_counts=True)
|
|
192
200
|
|
|
@@ -49,17 +49,20 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
49
49
|
[('Op Name', object), ('Stream ID', int), ('Task Start Time', float), ('Task Duration', float),
|
|
50
50
|
('pid', int)])
|
|
51
51
|
|
|
52
|
-
def init_timeline(self, op_summary, steptrace):
|
|
52
|
+
def init_timeline(self, op_summary, steptrace, pretty=False):
|
|
53
53
|
"""
|
|
54
54
|
Init timeline metadata, adding all collected info.
|
|
55
55
|
|
|
56
56
|
Args:
|
|
57
57
|
op_summary: op data
|
|
58
58
|
steptrace: step data
|
|
59
|
+
pretty: whether to format json file
|
|
59
60
|
"""
|
|
60
61
|
|
|
61
62
|
logger.info('Initiating timeline...')
|
|
62
|
-
|
|
63
|
+
self._pretty = pretty
|
|
64
|
+
if op_summary.size < 1:
|
|
65
|
+
return
|
|
63
66
|
timeline_list = op_summary[~np.isin(op_summary['Task Type'], ['AI_CPU', 'HCCL'])][
|
|
64
67
|
['Op Name', 'Stream ID', 'Task Start Time', 'Task Duration']]
|
|
65
68
|
|
|
@@ -102,6 +102,12 @@ class BaseTimelineGenerator:
|
|
|
102
102
|
self._model = model
|
|
103
103
|
self._step_start_op_name = ""
|
|
104
104
|
self._step_end_op_name = ""
|
|
105
|
+
self._pretty = False
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def indent(self):
|
|
109
|
+
indent = 1 if self._pretty else None
|
|
110
|
+
return indent
|
|
105
111
|
|
|
106
112
|
@staticmethod
|
|
107
113
|
def get_parallel_context():
|
|
@@ -201,7 +207,9 @@ class BaseTimelineGenerator:
|
|
|
201
207
|
with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
202
208
|
json_file.write('[')
|
|
203
209
|
for _, item in enumerate(self._timeline_meta):
|
|
204
|
-
json.
|
|
210
|
+
item_json = json.dumps([item], indent=self.indent)
|
|
211
|
+
item_json = item_json.lstrip('[').rstrip('\n]')
|
|
212
|
+
json_file.write(item_json)
|
|
205
213
|
if "scope_level" in item.keys():
|
|
206
214
|
self._max_scope_name_num = max(
|
|
207
215
|
self._max_scope_name_num, item["scope_level"] + 1)
|
|
@@ -209,7 +217,7 @@ class BaseTimelineGenerator:
|
|
|
209
217
|
json_file.write(',')
|
|
210
218
|
if file_size > size_limit:
|
|
211
219
|
break
|
|
212
|
-
label_name_json = json.dumps(self.get_thread_label_name())
|
|
220
|
+
label_name_json = json.dumps(self.get_thread_label_name(), indent=self.indent)
|
|
213
221
|
label_name_json = label_name_json.lstrip('[')
|
|
214
222
|
json_file.write(label_name_json)
|
|
215
223
|
os.chmod(display_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
@@ -230,7 +238,7 @@ class BaseTimelineGenerator:
|
|
|
230
238
|
try:
|
|
231
239
|
with os.fdopen(os.open(timeline_summary_file_path,
|
|
232
240
|
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
233
|
-
json.dump(self._timeline_summary, json_file)
|
|
241
|
+
json.dump(self._timeline_summary, json_file, indent=self.indent)
|
|
234
242
|
except (IOError, OSError) as err:
|
|
235
243
|
logger.critical('Error occurred when write timeline summary file: %s', err)
|
|
236
244
|
raise ProfilerIOException() from err
|
|
@@ -542,8 +542,9 @@ class CpuTimelineGenerator(GpuTimelineGenerator):
|
|
|
542
542
|
|
|
543
543
|
return timeline_list
|
|
544
544
|
|
|
545
|
-
def init_timeline(self):
|
|
545
|
+
def init_timeline(self, pretty=False):
|
|
546
546
|
"""Init timeline metadata, adding all collected info."""
|
|
547
|
+
self._pretty = pretty
|
|
547
548
|
timeline_list = self._load_timeline_data()
|
|
548
549
|
|
|
549
550
|
# Init a dict for counting the num of streams.
|