mindspore 2.2.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.11__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (170) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_akg/akg/composite/build_module.py +104 -20
  3. mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
  4. mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
  5. mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
  6. mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
  7. mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
  8. mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
  9. mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
  10. mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
  11. mindspore/_akg/akg/utils/kernel_exec.py +41 -15
  12. mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
  13. mindspore/_akg/akg/utils/util.py +56 -1
  14. mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
  15. mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
  16. mindspore/_checkparam.py +3 -3
  17. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  18. mindspore/_extends/graph_kernel/splitter.py +3 -2
  19. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
  20. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
  21. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  22. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
  23. mindspore/_extends/parse/__init__.py +3 -2
  24. mindspore/_extends/parse/parser.py +6 -1
  25. mindspore/_extends/parse/standard_method.py +14 -11
  26. mindspore/_extends/remote/kernel_build_server.py +2 -1
  27. mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
  28. mindspore/bin/cache_admin +0 -0
  29. mindspore/bin/cache_server +0 -0
  30. mindspore/common/_utils.py +16 -0
  31. mindspore/common/api.py +1 -1
  32. mindspore/common/auto_dynamic_shape.py +81 -85
  33. mindspore/common/dump.py +1 -1
  34. mindspore/common/tensor.py +3 -20
  35. mindspore/config/op_info.config +1 -1
  36. mindspore/context.py +11 -4
  37. mindspore/dataset/engine/cache_client.py +8 -5
  38. mindspore/dataset/engine/datasets_standard_format.py +5 -0
  39. mindspore/dataset/vision/transforms.py +21 -21
  40. mindspore/experimental/optim/adam.py +1 -1
  41. mindspore/gen_ops.py +1 -1
  42. mindspore/include/api/model.h +17 -0
  43. mindspore/include/api/status.h +8 -3
  44. mindspore/lib/libdnnl.so.2 +0 -0
  45. mindspore/lib/libmindspore.so +0 -0
  46. mindspore/lib/libmindspore_backend.so +0 -0
  47. mindspore/lib/libmindspore_common.so +0 -0
  48. mindspore/lib/libmindspore_core.so +0 -0
  49. mindspore/lib/libmindspore_glog.so.0 +0 -0
  50. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  51. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  52. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  53. mindspore/lib/libmindspore_shared_lib.so +0 -0
  54. mindspore/lib/libnnacl.so +0 -0
  55. mindspore/lib/libopencv_core.so.4.5 +0 -0
  56. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  57. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  58. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
  59. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
  60. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
  61. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
  62. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  63. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  64. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  65. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  66. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  67. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  68. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  69. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  70. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  71. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  72. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  73. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +78 -80
  74. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  75. mindspore/lib/plugin/ascend/libakg.so +0 -0
  76. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  77. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  78. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  79. mindspore/lib/plugin/cpu/libakg.so +0 -0
  80. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  81. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  82. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  83. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  84. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  85. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  86. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  87. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  88. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  89. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  90. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  91. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  92. mindspore/nn/cell.py +0 -3
  93. mindspore/nn/layer/activation.py +4 -5
  94. mindspore/nn/layer/conv.py +39 -23
  95. mindspore/nn/layer/flash_attention.py +54 -129
  96. mindspore/nn/layer/math.py +3 -7
  97. mindspore/nn/layer/rnn_cells.py +5 -5
  98. mindspore/nn/wrap/__init__.py +4 -2
  99. mindspore/nn/wrap/cell_wrapper.py +12 -3
  100. mindspore/numpy/utils_const.py +5 -5
  101. mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
  102. mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
  103. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
  104. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  105. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  106. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
  107. mindspore/ops/_utils/utils.py +2 -0
  108. mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
  109. mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
  110. mindspore/ops/function/array_func.py +10 -7
  111. mindspore/ops/function/grad/grad_func.py +0 -1
  112. mindspore/ops/function/nn_func.py +98 -9
  113. mindspore/ops/function/random_func.py +2 -1
  114. mindspore/ops/op_info_register.py +24 -21
  115. mindspore/ops/operations/__init__.py +6 -2
  116. mindspore/ops/operations/_grad_ops.py +25 -6
  117. mindspore/ops/operations/_inner_ops.py +155 -23
  118. mindspore/ops/operations/array_ops.py +9 -7
  119. mindspore/ops/operations/comm_ops.py +2 -2
  120. mindspore/ops/operations/custom_ops.py +85 -68
  121. mindspore/ops/operations/inner_ops.py +26 -3
  122. mindspore/ops/operations/math_ops.py +7 -6
  123. mindspore/ops/operations/nn_ops.py +193 -49
  124. mindspore/parallel/_parallel_serialization.py +10 -3
  125. mindspore/parallel/_tensor.py +4 -1
  126. mindspore/parallel/checkpoint_transform.py +13 -2
  127. mindspore/parallel/shard.py +17 -10
  128. mindspore/profiler/common/util.py +1 -0
  129. mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
  130. mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
  131. mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
  132. mindspore/profiler/parser/ascend_op_generator.py +1 -1
  133. mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
  134. mindspore/profiler/parser/base_timeline_generator.py +1 -1
  135. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
  136. mindspore/profiler/parser/framework_parser.py +1 -1
  137. mindspore/profiler/parser/profiler_info.py +19 -0
  138. mindspore/profiler/profiling.py +46 -24
  139. mindspore/rewrite/api/pattern_engine.py +1 -1
  140. mindspore/rewrite/parsers/for_parser.py +7 -7
  141. mindspore/rewrite/parsers/module_parser.py +4 -4
  142. mindspore/rewrite/symbol_tree.py +1 -4
  143. mindspore/run_check/_check_version.py +5 -3
  144. mindspore/safeguard/rewrite_obfuscation.py +52 -28
  145. mindspore/scipy/ops.py +55 -5
  146. mindspore/scipy/optimize/__init__.py +3 -2
  147. mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
  148. mindspore/train/callback/_summary_collector.py +1 -1
  149. mindspore/train/dataset_helper.py +1 -0
  150. mindspore/train/model.py +2 -2
  151. mindspore/train/serialization.py +97 -11
  152. mindspore/train/summary/_summary_adapter.py +1 -1
  153. mindspore/train/summary/summary_record.py +23 -7
  154. mindspore/version.py +1 -1
  155. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
  156. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +160 -151
  157. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
  158. mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
  159. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
  160. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
  161. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
  162. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
  163. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  164. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
  165. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
  166. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
  167. /mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
  168. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
  169. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
  170. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
@@ -82,6 +82,238 @@ def count_average(data):
82
82
  class AscendHCCLGenerator:
83
83
  """Generate ascend hccl data from files."""
84
84
 
85
+ def __init__(self, source_path, steptrace):
86
+ self.root_path = source_path
87
+ self.steptrace = steptrace
88
+ self.hccl_raw = []
89
+ self.hccl_data_df = np.dtype(
90
+ [('model_id', int), ('iteration_id', int), ('name', object), ('pid', int), ('tid', int), ('ts', float),
91
+ ('te', float), ('dur', float), ('ph', object),
92
+ ('task_type', object), ('link_info', object), ('transport_type', object), ('size', int), ('tag', object)])
93
+
94
+ @staticmethod
95
+ def _cost_analyse(iteration):
96
+ """analyse communication cost and wait cost"""
97
+ communication_cost = np.sum(iteration[iteration['name'] != 'Notify_Wait']['dur'])
98
+ wait_cost = np.sum(iteration[iteration['name'] == 'Notify_Wait']['dur'])
99
+ return communication_cost, wait_cost
100
+
101
+ @staticmethod
102
+ def _rdma_analyse(groupby_transport):
103
+ """rdma analyse"""
104
+ thread_groups = np.unique(groupby_transport['tid'])
105
+ thread_information = []
106
+ for thread_index in thread_groups:
107
+ groupby_thread = groupby_transport[groupby_transport['tid'] == thread_index]
108
+ rdma_communication_time = 0
109
+ rdma_communication_size = 0
110
+ rdma_communication_wait_time = 0
111
+ start_index = 0
112
+ end_index = groupby_thread.size - 2
113
+ while start_index < end_index:
114
+ first_task_type = groupby_thread[start_index]['task_type']
115
+ if first_task_type == 'RDMASend':
116
+ second_index = start_index + 1
117
+ third_index = start_index + 2
118
+ second_task_type = groupby_thread[second_index]['task_type']
119
+ third_task_type = groupby_thread[third_index]['task_type']
120
+ if second_task_type == 'RDMASend' and third_task_type == 'Notify Wait':
121
+ rdma_send_cost = groupby_thread[start_index]['dur']
122
+ notify_record_cost = groupby_thread[second_index]['dur']
123
+ notify_wait_cost = groupby_thread[third_index]['dur']
124
+ rdma_communication_time += rdma_send_cost + notify_record_cost + notify_wait_cost
125
+ rdma_communication_wait_time += notify_wait_cost
126
+ rdma_communication_size += groupby_thread[start_index]['size'] + groupby_thread[second_index][
127
+ 'size']
128
+ start_index += 2
129
+ start_index += 1
130
+ rdma_communication_wait_time = rdma_communication_wait_time / 1e3
131
+ rdma_communication_size = rdma_communication_size / 1e3
132
+ rdma_communication_time = rdma_communication_time / 1e3
133
+ rdma_bandwidth = rdma_communication_size / (rdma_communication_time / 1e3) \
134
+ if rdma_communication_size else 0
135
+ thread_information.append(
136
+ [rdma_communication_time, rdma_communication_size, rdma_bandwidth, rdma_communication_wait_time])
137
+ if len(thread_information) > 1:
138
+ thread_information = np.sum(thread_information, axis=0).tolist()
139
+
140
+ return thread_information
141
+
142
+ def parse(self):
143
+ """Analyse the original hccl data generator hccl data."""
144
+ hccl_data = []
145
+ for hccl_file in find_files(self.root_path, "hccl_*.json"):
146
+ with open(hccl_file) as fr:
147
+ hccl_data.append(self._original_data_analyse(json.load(fr)))
148
+ hccl_data = np.concatenate(hccl_data)
149
+
150
+ for model_id in np.unique(hccl_data['model_id']):
151
+ hccl_data_model = hccl_data[hccl_data['model_id'] == model_id]
152
+ for iteration_id in np.unique(hccl_data_model['iteration_id']):
153
+ hccl_data_model_iteration = hccl_data_model[hccl_data_model['iteration_id'] == iteration_id]
154
+
155
+ hccl_abstract_data = hccl_data_model_iteration[hccl_data_model_iteration['task_type'] == '']
156
+ hccl_detail_data = hccl_data_model_iteration[hccl_data_model_iteration['task_type'] != '']
157
+ hccl_abstract_data = np.sort(hccl_abstract_data, order='ts')
158
+ hccl_detail_data = np.sort(hccl_detail_data, order='ts')
159
+
160
+ tag = np.searchsorted(hccl_abstract_data['ts'], hccl_detail_data['ts'], side='right') - 1
161
+
162
+ hccl_detail_data['tag'] = [x[-1] for x in
163
+ np.char.split(hccl_abstract_data[tag]['name'].astype(str), sep='/')]
164
+
165
+ self.hccl_raw.append(self._iteration_analyse(hccl_detail_data, iteration_id))
166
+
167
+ self.hccl_raw = sorted(self.hccl_raw, key=lambda x: x[0])
168
+ self.hccl_raw.append(copy.deepcopy(self.hccl_raw[-1]))
169
+ self.hccl_raw[-1][0] = '-'
170
+ for _, value in self.hccl_raw[-1][4].items():
171
+ value[0] = '-'
172
+
173
+ def write(self, hccl_raw_path):
174
+ """
175
+ Write the flops.csv and flops_summary.json
176
+
177
+ Args:
178
+ hccl_raw_path(str): hccl_raw.csv path.
179
+ """
180
+ try:
181
+ with os.fdopen(os.open(hccl_raw_path,
182
+ os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR), 'w',
183
+ newline='') as hccl_row:
184
+ writer = csv.writer(hccl_row)
185
+ writer.writerow(
186
+ ['step_num', 'communication_cost', 'wait_cost', 'link_info', 'communication_operator_cost'])
187
+ for row in self.hccl_raw:
188
+ row[3] = json.dumps(row[3])
189
+ row[4] = json.dumps(row[4])
190
+ writer.writerows(self.hccl_raw)
191
+ except (IOError, OSError) as err:
192
+ logging.critical('Errot occurred when write aicore detail file: %s', err)
193
+ raise ProfilerIOException() from err
194
+ if os.path.exists(hccl_raw_path):
195
+ os.chmod(hccl_raw_path, stat.S_IREAD | stat.S_IWRITE)
196
+
197
+ def _original_data_analyse(self, original_data):
198
+ """analyse original data"""
199
+
200
+ groups_steptrace = {model_id: np.sort(self.steptrace[self.steptrace['Model ID'] == model_id],
201
+ order='Iteration ID')
202
+ for model_id in np.unique(self.steptrace['Model ID'])}
203
+
204
+ target_data = []
205
+ for row in original_data:
206
+ model_id = row.get('args', {}).get('model id')
207
+ if row.get('ph') == 'X' and model_id is not None:
208
+ name = row.get('name')
209
+ pid = row.get('pid')
210
+ tid = row.get('tid')
211
+ ts = row.get('ts')
212
+ dur = row.get('dur')
213
+ te = ts + dur
214
+ ph = row.get('ph')
215
+ task_type = row.get('args', {}).get('task type', '')
216
+ src_rank = row.get('args', {}).get('src rank', 0)
217
+ dst_rank = row.get('args', {}).get('dst rank', 0)
218
+ if src_rank == int('0xffffffff', 16):
219
+ src_rank = dst_rank
220
+ if dst_rank == int('0xffffffff', 16):
221
+ dst_rank = src_rank
222
+ transport_type = row.get('args', {}).get('transport type', '')
223
+ if transport_type == 'LOCAL':
224
+ src_rank, dst_rank = dst_rank, src_rank
225
+ link_info = str(src_rank) + '-' + str(dst_rank)
226
+ size = row.get('args', {}).get('size(Byte)', 0)
227
+ size = size if isinstance(size, int) else int(size, 16)
228
+ steptrace = groups_steptrace.get(model_id, None)
229
+ if steptrace is None:
230
+ logging.warning('Could not find model: %s in hccl json, skip.', model_id)
231
+ continue
232
+ tag = np.searchsorted(steptrace['Iteration End'], te * 1e-3, side='left')
233
+ iteration_id = steptrace[tag]['Iteration ID']
234
+ target_data.append(
235
+ tuple([model_id, iteration_id, name, pid, tid,
236
+ ts, te, dur, ph, task_type,
237
+ link_info, transport_type, size, -1]))
238
+
239
+ hccl_data = np.array(target_data, dtype=self.hccl_data_df)
240
+
241
+ return hccl_data
242
+
243
+ def _iteration_analyse(self, hccl_detail_data, iteration):
244
+ """analyse data by iteration """
245
+ communication_cost, wait_cost = self._cost_analyse(hccl_detail_data)
246
+ link_info = self._link_info_analyse(hccl_detail_data)
247
+ communication_operator_cost = self._communication_operator_cost_analyse(hccl_detail_data, iteration)
248
+ return [iteration, communication_cost, wait_cost, link_info, communication_operator_cost]
249
+
250
+ def _link_info_analyse(self, hccl_detail_data):
251
+ """analyse link info data"""
252
+ groupby_iteration = hccl_detail_data[hccl_detail_data['task_type'] != 'Notify Record']
253
+ link_info_groups = np.unique(groupby_iteration['link_info'])
254
+ link_info_information = dict()
255
+ for link_info_index in link_info_groups:
256
+ groupby_link_info = groupby_iteration[groupby_iteration['link_info'] == link_info_index]
257
+ transport_groups = np.unique(groupby_iteration['transport_type'])
258
+ transport_information = dict()
259
+ for transport_index in transport_groups:
260
+ groupby_transport = groupby_link_info[groupby_link_info['transport_type'] == transport_index]
261
+ if transport_index == 'SDMA' and groupby_transport.size > 0:
262
+ groupby_sdma = \
263
+ groupby_transport[np.isin(groupby_transport['task_type'], ['Memcpy', 'Reduce Inline'])][
264
+ ['dur', 'size']]
265
+ sdma_communication_time = np.sum(groupby_sdma['dur']) * 1e-3
266
+ sdma_communication_size = np.sum(groupby_sdma['size']) * 1e-3
267
+ sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
268
+ if sdma_communication_time != 0 else 0
269
+ transport_information['SDMA'] = [sdma_communication_time, sdma_communication_size, sdma_bandwidth]
270
+ elif transport_index == 'RDMA' and groupby_transport.size > 0:
271
+ transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
272
+ link_info_information[link_info_index] = transport_information
273
+ return link_info_information
274
+
275
+ def _communication_operator_cost_analyse(self, hccl_detail_data, iteration_index):
276
+ """analyse communication operator cost"""
277
+ groupby_iteration = hccl_detail_data[hccl_detail_data['task_type'] != 'Notify Record']
278
+ tag_groups = np.unique(groupby_iteration['tag'])
279
+ tag_information = dict()
280
+ for tag_index in tag_groups:
281
+ groupby_tag = groupby_iteration[groupby_iteration['tag'] == tag_index]
282
+ link_groups = np.unique(groupby_iteration['link_info'])
283
+ link_info_information = dict()
284
+ for link_info_index in link_groups:
285
+ groupby_link_info = groupby_tag[groupby_tag['link_info'] == link_info_index]
286
+ transport_groups = np.unique(groupby_link_info['transport_type'])
287
+ transport_information = dict()
288
+ for transport_index in transport_groups:
289
+ groupby_transport = groupby_link_info[groupby_link_info['transport_type'] == transport_index]
290
+ if transport_index == 'SDMA':
291
+ groupby_sdma = \
292
+ groupby_transport[np.isin(groupby_transport['task_type'], ['Memcpy', 'Reduce Inline'])][
293
+ ['dur', 'size']]
294
+ sdma_communication_time = np.sum(groupby_sdma['dur']) * 1e-3
295
+ sdma_communication_size = np.sum(groupby_sdma['size']) * 1e-3
296
+ sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
297
+ if sdma_communication_time != 0 else 0
298
+ transport_information['SDMA'] = [
299
+ sdma_communication_time, sdma_communication_size,
300
+ sdma_bandwidth
301
+ ]
302
+ elif transport_index == 'RDMA':
303
+ transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
304
+ link_info_information[link_info_index] = transport_information
305
+ communication_cost = np.sum(groupby_tag[groupby_tag['name'] != 'Notify_Wait']['dur'])
306
+ wait_cost = np.sum(groupby_tag[groupby_tag['name'] == 'Notify_Wait']['dur'])
307
+ tag_information[tag_index] = [
308
+ str(iteration_index), communication_cost, wait_cost,
309
+ link_info_information
310
+ ]
311
+ return tag_information
312
+
313
+
314
+ class AscendHCCLGeneratorOld:
315
+ """Generate ascend hccl data from files."""
316
+
85
317
  def __init__(self, source_path):
86
318
  self.root_path = source_path
87
319
  self.hccl_raw = []
@@ -15,6 +15,8 @@
15
15
  """msprof PROF data export api file"""
16
16
  import os
17
17
  import shutil
18
+ import json
19
+ from json import JSONDecodeError
18
20
  from collections import defaultdict
19
21
  from subprocess import CalledProcessError, TimeoutExpired
20
22
  from subprocess import Popen, PIPE
@@ -39,7 +41,7 @@ class AscendMsprofExporter:
39
41
  >> ms_exporter = AscendMsprofExporter("path/to/profiler/data")
40
42
  >> ms_exporter.export(start_time)
41
43
  """
42
-
44
+ DRV_VERSION = 467473
43
45
  _hiai_msprof_tail = "Ascend/latest/tools/profiler/bin"
44
46
  _msprof_cmd = "msprof"
45
47
  _ascend_mark = "Ascend"
@@ -49,46 +51,79 @@ class AscendMsprofExporter:
49
51
  _op_summary_mark = "op_summary"
50
52
  _op_statistic_mark = "op_statistic"
51
53
 
52
- def __init__(self, source_path, time_out=3000):
54
+ def __init__(self, source_path, time_out=3600):
53
55
  self._time_out = time_out
54
56
  self.source_path = source_path
55
- self.prof_root_dir = os.path.abspath(os.path.join(self.source_path, os.path.pardir))
57
+ self.prof_root_dir = os.path.abspath(os.path.join(self.source_path, os.path.pardir)) # PROF*/
56
58
 
57
59
  self._check_msprof_env()
58
60
 
61
+ def get_drv_version(self):
62
+ """Get the drv_version for choosing the export mode."""
63
+ host_dir = os.path.join(self.prof_root_dir, 'host')
64
+ cmd = ['python',
65
+ '/usr/local/Ascend/latest/tools/profiler/profiler_tool/analysis/interface/get_msprof_info.py',
66
+ '-dir', host_dir]
67
+ try:
68
+ outs, _ = self._run_cmd(cmd)
69
+ if not outs:
70
+ logger.warning('Check the drvVersion can`t find the result, use single export mode instead.')
71
+ return False
72
+ result = json.loads(outs)
73
+ logger.info('get drv_version result is : %s', result)
74
+ status = result.get('status', 1)
75
+ if status == 1:
76
+ return False
77
+ drv_version = result.get('data', {}).get('version_info', {}).get('drv_version', 0)
78
+ if drv_version >= self.DRV_VERSION:
79
+ return True
80
+ return False
81
+ except (RuntimeError, JSONDecodeError, AttributeError) as err:
82
+ logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
83
+ return False
84
+
59
85
  def export(self, model_iteration_dict=None):
60
86
  """start_time is the time to collect PROF data"""
61
87
 
62
- if not model_iteration_dict:
63
- model_iteration_dict = self._generate_step_trace(self.prof_root_dir, self.source_path)
88
+ flag = self.get_drv_version()
89
+ if not flag or model_iteration_dict:
90
+ flag = False
91
+ if not model_iteration_dict:
92
+ model_iteration_dict = self._generate_step_trace(self.prof_root_dir, self.source_path)
64
93
 
65
- if model_iteration_dict:
66
- for model_id, value in model_iteration_dict.items():
67
- for iteration_id in value:
68
- msprof_export_cmd = self._msprof_command_generator(self.prof_root_dir, model_id, iteration_id)
69
- self._run_cmd(msprof_export_cmd)
70
- self._check_export_files(self.source_path, model_iteration_dict)
94
+ if model_iteration_dict:
95
+ for model_id, value in model_iteration_dict.items():
96
+ for iteration_id in value:
97
+ msprof_export_cmd = self._msprof_command_generator_old(self.prof_root_dir, model_id,
98
+ iteration_id)
99
+ self._run_cmd(msprof_export_cmd)
71
100
 
72
- def _run_cmd(self, cmd, raise_error=True):
101
+ self._check_export_files_old(self.source_path, model_iteration_dict)
102
+ else:
103
+ msprof_export_cmd = self._msprof_command_generator(self.prof_root_dir)
104
+ self._run_cmd(msprof_export_cmd)
105
+ self._check_export_files(self.source_path)
106
+
107
+ return flag
108
+
109
+ def _run_cmd(self, cmd):
73
110
  """run shell command"""
74
111
  try:
75
112
  proc = Popen(cmd, stdout=PIPE, stderr=PIPE, text=True)
76
113
  except (FileNotFoundError, PermissionError, CalledProcessError) as exc:
77
- raise RuntimeError(exc)
114
+ raise RuntimeError(exc) from exc
78
115
  try:
79
116
  outs, errs = proc.communicate(timeout=self._time_out)
80
- except TimeoutExpired:
117
+ except TimeoutExpired as err:
81
118
  proc.kill()
82
119
  msg = "The possible cause is that too much data is collected " \
83
120
  "and the export time is too long."
84
121
  logger.error(msg)
85
- raise TimeoutError(msg)
122
+ raise TimeoutError(msg) from err
86
123
  logger.info(outs)
87
- if raise_error and errs != "":
88
- raise RuntimeError(errs)
89
- return outs
124
+ return outs, errs
90
125
 
91
- def _msprof_command_generator(self, output, model_id=None, iter_id=None):
126
+ def _msprof_command_generator_old(self, output, model_id=None, iter_id=None):
92
127
  """msprof export helper"""
93
128
  export_cmd = [self._msprof_cmd, "--export=on", "--output={}".format(output)]
94
129
  if isinstance(model_id, int) and model_id >= 0:
@@ -97,6 +132,10 @@ class AscendMsprofExporter:
97
132
  export_cmd.append("--iteration-id={}".format(iter_id))
98
133
  return export_cmd
99
134
 
135
+ def _msprof_command_generator(self, output):
136
+ """msprof export helper"""
137
+ return [self._msprof_cmd, "--export=on", "--output={}".format(output)]
138
+
100
139
  def _check_msprof_env(self):
101
140
  """Check the existence of msprof binary tool"""
102
141
 
@@ -109,7 +148,7 @@ class AscendMsprofExporter:
109
148
  return False
110
149
 
111
150
  msprof_cmd = ["which", self._msprof_cmd]
112
- outs = self._run_cmd(msprof_cmd, raise_error=False)
151
+ outs, _ = self._run_cmd(msprof_cmd)
113
152
  if outs != "":
114
153
  return
115
154
  logger.warning("[Profiler]The msprof command was not found. Searching from environment variables...")
@@ -143,8 +182,7 @@ class AscendMsprofExporter:
143
182
  summary_path = os.path.join(device_path, self._summary_dir)
144
183
  timeline_path = os.path.join(device_path, self._timeline_dir)
145
184
 
146
- msprof_export_cmd = self._msprof_command_generator(prof_path)
147
- self._run_cmd(msprof_export_cmd)
185
+ self._run_cmd(self._msprof_command_generator_old(prof_path))
148
186
 
149
187
  if not os.path.isdir(summary_path):
150
188
  msg = "Path {} is not a existing directory. Make sure there is " \
@@ -158,16 +196,15 @@ class AscendMsprofExporter:
158
196
  return None
159
197
 
160
198
  step_trace = defaultdict(list)
161
- with open(step_trace_file, newline='', mode='r') as csvfile:
199
+ with os.fdopen(os.open(step_trace_file, os.O_RDONLY, 0o600), newline='', mode='r') as csvfile:
162
200
  reader = csv.reader(csvfile, delimiter=',', quotechar='"')
163
- header = next(reader)
164
- for index, value in enumerate(header):
201
+ for index, value in enumerate(next(reader)):
165
202
  if value == 'Model ID':
166
- Model_ID = index
203
+ model_id = index
167
204
  if value == 'Iteration ID':
168
- Iteration_ID = index
205
+ iteration_id = index
169
206
  for row in reader:
170
- step_trace[int(row[Model_ID])].append(int(row[Iteration_ID]))
207
+ step_trace[int(row[model_id])].append(int(row[iteration_id]))
171
208
 
172
209
  if os.path.isdir(summary_path):
173
210
  shutil.rmtree(summary_path)
@@ -176,16 +213,15 @@ class AscendMsprofExporter:
176
213
 
177
214
  return step_trace
178
215
 
179
- def _check_export_files(self, source_path, step_trace):
216
+ def _check_export_files_old(self, source_path, step_trace):
180
217
  """Check the existence of op_summary & op_statistic files."""
181
218
  summary_path = os.path.join(source_path, self._summary_dir)
182
219
  if not os.path.isdir(summary_path):
183
220
  raise RuntimeError("Path {} is not a existing directory.".format(summary_path))
184
- summary_file_list = os.listdir(summary_path)
185
221
  op_summary = set()
186
222
  op_statistic = set()
187
223
 
188
- for summary_file in summary_file_list:
224
+ for summary_file in os.listdir(summary_path):
189
225
  if summary_file.startswith(self._op_summary_mark):
190
226
  op_summary.add(summary_file)
191
227
  elif summary_file.startswith(self._op_statistic_mark):
@@ -196,18 +232,25 @@ class AscendMsprofExporter:
196
232
  if not op_statistic:
197
233
  raise RuntimeError("The op_statistics file was not found, perhaps the original data was not collected.")
198
234
 
199
- device_id = source_path.split('_')[-1].replace("/", "")
235
+ logger.info("Finish checking files.")
200
236
 
201
- for model_id, value in step_trace.items():
202
- for iteration_id in value:
203
- tag = f"_{device_id}_{model_id}_{iteration_id}.csv"
204
- op_summary_file_name = self._op_summary_mark + tag
205
- op_statistic_file = self._op_statistic_mark + tag
206
- if op_summary_file_name not in op_summary:
207
- logger.warning("[Profiler]The file {} was not found, " \
208
- "perhaps the original data was not collected.".format(op_summary_file_name))
209
- if op_statistic_file not in op_statistic:
210
- logger.warning("[Profiler]The file {} was not found, " \
211
- "perhaps the original data was not collected.".format(op_statistic_file))
237
+ def _check_export_files(self, source_path):
238
+ """Check the existence of op_summary & op_statistic files."""
239
+ summary_path = os.path.join(source_path, self._summary_dir)
240
+ if not os.path.isdir(summary_path):
241
+ raise RuntimeError("Path {} is not a existing directory.".format(summary_path))
242
+ summary_file_list = os.listdir(summary_path)
243
+ op_summary = set()
244
+ op_statistic = set()
245
+
246
+ for summary_file in summary_file_list:
247
+ if summary_file.startswith(self._op_summary_mark):
248
+ op_summary.add(summary_file)
249
+ elif summary_file.startswith(self._op_statistic_mark):
250
+ op_statistic.add(summary_file)
212
251
 
252
+ if not op_summary:
253
+ raise RuntimeError("The op_summary file was not found, perhaps the original data was not collected.")
254
+ if not op_statistic:
255
+ raise RuntimeError("The op_statistics file was not found, perhaps the original data was not collected.")
213
256
  logger.info("Finish checking files.")