mindstudio-probe 8.2.0__py3-none-any.whl → 8.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {mindstudio_probe-8.2.0.dist-info → mindstudio_probe-8.2.1.dist-info}/METADATA +2 -2
  2. {mindstudio_probe-8.2.0.dist-info → mindstudio_probe-8.2.1.dist-info}/RECORD +63 -61
  3. msprobe/README.md +4 -4
  4. msprobe/core/common/const.py +6 -0
  5. msprobe/core/common/db_manager.py +35 -4
  6. msprobe/core/common/file_utils.py +28 -5
  7. msprobe/core/common/megatron_utils.py +59 -0
  8. msprobe/core/common/utils.py +14 -3
  9. msprobe/core/compare/diff_analyze/first_diff_analyze.py +16 -4
  10. msprobe/core/compare/diff_analyze/ignore_op_list.yaml +3 -0
  11. msprobe/core/compare/find_first/analyzer.py +8 -7
  12. msprobe/core/compare/find_first/graph.py +11 -3
  13. msprobe/core/compare/find_first/utils.py +3 -2
  14. msprobe/core/compare/highlight.py +13 -6
  15. msprobe/core/compare/multiprocessing_compute.py +17 -10
  16. msprobe/core/compare/utils.py +14 -5
  17. msprobe/core/data_dump/data_collector.py +18 -21
  18. msprobe/core/data_dump/data_processor/pytorch_processor.py +43 -20
  19. msprobe/core/data_dump/json_writer.py +18 -8
  20. msprobe/core/data_dump/scope.py +4 -6
  21. msprobe/core/hook_manager.py +21 -0
  22. msprobe/core/service.py +2 -0
  23. msprobe/core/single_save/single_comparator.py +16 -3
  24. msprobe/docs/01.installation.md +7 -5
  25. msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
  26. msprobe/docs/06.data_dump_MindSpore.md +1 -1
  27. msprobe/docs/10.accuracy_compare_PyTorch.md +46 -5
  28. msprobe/docs/14.data_parse_PyTorch.md +1 -1
  29. msprobe/docs/19.monitor.md +2 -0
  30. msprobe/docs/21.visualization_PyTorch.md +15 -80
  31. msprobe/docs/22.visualization_MindSpore.md +20 -104
  32. msprobe/docs/23.generate_operator_PyTorch.md +1 -1
  33. msprobe/docs/26.data_dump_PyTorch_baseline.md +7 -7
  34. msprobe/docs/img/visualization/vis_browser_1.png +0 -0
  35. msprobe/docs/img/visualization/vis_match_info.png +0 -0
  36. msprobe/docs/img/visualization/vis_precision_info.png +0 -0
  37. msprobe/docs/img/visualization/vis_search_info.png +0 -0
  38. msprobe/docs/img/visualization/vis_show_info.png +0 -0
  39. msprobe/docs/img/visualization/vis_showcase.png +0 -0
  40. msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
  41. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +1 -1
  42. msprobe/mindspore/cell_processor.py +33 -5
  43. msprobe/mindspore/compare/common_dir_compare.py +22 -26
  44. msprobe/mindspore/debugger/precision_debugger.py +1 -1
  45. msprobe/mindspore/dump/cell_dump_process.py +73 -62
  46. msprobe/mindspore/dump/graph_mode_cell_dump.py +21 -10
  47. msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +2 -0
  48. msprobe/pytorch/compare/utils.py +2 -1
  49. msprobe/pytorch/dump/module_dump/hook_wrapper.py +10 -7
  50. msprobe/pytorch/dump/module_dump/module_processer.py +15 -8
  51. msprobe/pytorch/monitor/module_hook.py +28 -9
  52. msprobe/pytorch/online_dispatch/dispatch.py +42 -24
  53. msprobe/visualization/builder/graph_builder.py +169 -64
  54. msprobe/visualization/builder/graph_merger.py +0 -1
  55. msprobe/visualization/builder/msprobe_adapter.py +1 -1
  56. msprobe/visualization/db_utils.py +25 -2
  57. msprobe/visualization/graph/base_node.py +0 -24
  58. msprobe/visualization/graph/graph.py +5 -14
  59. msprobe/visualization/graph_service.py +29 -53
  60. {mindstudio_probe-8.2.0.dist-info → mindstudio_probe-8.2.1.dist-info}/LICENSE +0 -0
  61. {mindstudio_probe-8.2.0.dist-info → mindstudio_probe-8.2.1.dist-info}/WHEEL +0 -0
  62. {mindstudio_probe-8.2.0.dist-info → mindstudio_probe-8.2.1.dist-info}/entry_points.txt +0 -0
  63. {mindstudio_probe-8.2.0.dist-info → mindstudio_probe-8.2.1.dist-info}/top_level.txt +0 -0
@@ -88,11 +88,7 @@ def _export_compare_graph_result(args, result):
88
88
  graphs = [result.graph_n, result.graph_b]
89
89
  graph_comparator = result.graph_comparator
90
90
  micro_steps = result.micro_steps
91
- output_file_name = result.output_file_name
92
- if not output_file_name:
93
- output_file_name = f'compare_{current_time}.vis'
94
- logger.info(f'Start exporting compare graph result, file name: {output_file_name}...')
95
- output_path = os.path.join(args.output_path, output_file_name)
91
+ logger.info(f'Start exporting compare graph result, file name: {compare_output_db_name}...')
96
92
  output_db_path = os.path.join(args.output_path, compare_output_db_name)
97
93
  task = GraphConst.GRAPHCOMPARE_MODE_TO_DUMP_MODE_TO_MAPPING.get(graph_comparator.ma.compare_mode)
98
94
  export_config = GraphExportConfig(graphs[0], graphs[1], graph_comparator.ma.get_tool_tip(),
@@ -101,13 +97,12 @@ def _export_compare_graph_result(args, result):
101
97
  args.step_list if hasattr(args, 'step_list') else [0],
102
98
  args.rank_list if hasattr(args, 'rank_list') else [0])
103
99
  try:
104
- # output_db_path to_db
105
- GraphBuilder.to_json(output_path, export_config)
106
- logger.info(f'Exporting compare graph result successfully, the result file is saved in {output_path}')
100
+ GraphBuilder.to_db(output_db_path, export_config)
101
+ logger.info(f'Exporting compare graph result successfully, the result file is saved in {output_db_path}')
107
102
  return ''
108
103
  except RuntimeError as e:
109
- logger.error(f'Failed to export compare graph result, file: {output_file_name}, error: {e}')
110
- return output_file_name
104
+ logger.error(f'Failed to export compare graph result, file: {compare_output_db_name}, error: {e}')
105
+ return compare_output_db_name
111
106
 
112
107
 
113
108
  def _build_graph_info(dump_path, args, graph=None):
@@ -118,7 +113,7 @@ def _build_graph_info(dump_path, args, graph=None):
118
113
  stack_path = FileChecker(os.path.join(dump_path, GraphConst.STACK_FILE), FileCheckConst.FILE,
119
114
  FileCheckConst.READ_ABLE).common_check()
120
115
  if not graph:
121
- graph = GraphBuilder.build(construct_path, data_path, stack_path, complete_stack=args.complete_stack)
116
+ graph = GraphBuilder.build(construct_path, data_path, stack_path)
122
117
  return GraphInfo(graph, construct_path, data_path, stack_path)
123
118
 
124
119
 
@@ -144,16 +139,14 @@ def _run_build_graph_compare(input_param, args, nr, br):
144
139
  def _run_build_graph_single(dump_ranks_path, rank, step, args):
145
140
  logger.info(f'Start building graph for {rank}...')
146
141
  dump_path = os.path.join(dump_ranks_path, rank)
147
- output_file_name = f'build_{step}_{rank}_{current_time}.vis' if step else f'build_{rank}_{current_time}.vis'
148
142
  result = _build_graph_result(dump_path, args)
149
- result.output_file_name = output_file_name
150
143
  if rank != Const.RANK:
151
144
  result.rank = get_step_or_rank_int(rank, True)
152
145
  logger.info(f'Building graph for step: {step}, rank: {rank} finished.')
153
146
  return result
154
147
 
155
148
 
156
- def _run_graph_compare(graph_task_info, input_param, args, output_file_name):
149
+ def _run_graph_compare(graph_task_info, input_param, args):
157
150
  logger.info(f'Start comparing data for {graph_task_info.npu_rank}...')
158
151
  graph_n = graph_task_info.graph_info_n
159
152
  graph_b = graph_task_info.graph_info_b
@@ -165,7 +158,6 @@ def _run_graph_compare(graph_task_info, input_param, args, output_file_name):
165
158
  graph_n.graph.overflow_check()
166
159
  graph_b.graph.overflow_check()
167
160
  graph_result = CompareGraphResult(graph_n.graph, graph_b.graph, graph_comparator, micro_steps)
168
- graph_result.output_file_name = output_file_name
169
161
  if nr != Const.RANK:
170
162
  graph_result.rank = get_step_or_rank_int(nr, True)
171
163
  logger.info(f'Comparing data for {graph_task_info.npu_rank} finished.')
@@ -177,23 +169,18 @@ def _export_build_graph_result(args, result):
177
169
  graph = result.graph
178
170
  micro_steps = result.micro_steps
179
171
  overflow_check = args.overflow_check
180
- output_file_name = result.output_file_name
181
- if not output_file_name:
182
- output_file_name = f'build_{current_time}.vis'
183
- logger.info(f'Start exporting graph for {output_file_name}...')
184
- output_path = os.path.join(out_path, output_file_name)
172
+ logger.info(f'Start exporting graph for {build_output_db_name}...')
185
173
  output_db_path = os.path.join(out_path, build_output_db_name)
186
174
  config = GraphExportConfig(graph, micro_steps=micro_steps, overflow_check=overflow_check, rank=result.rank,
187
175
  step=result.step, rank_list=args.rank_list if hasattr(args, 'rank_list') else [0],
188
176
  step_list=args.step_list if hasattr(args, 'step_list') else [0])
189
177
  try:
190
- # output_db_path to_db
191
- GraphBuilder.to_json(output_path, config)
192
- logger.info(f'Model graph exported successfully, the result file is saved in {output_path}')
178
+ GraphBuilder.to_db(output_db_path, config)
179
+ logger.info(f'Model graph exported successfully, the result file is saved in {output_db_path}')
193
180
  return None
194
181
  except RuntimeError as e:
195
- logger.error(f'Failed to export model graph, file: {output_file_name}, error: {e}')
196
- return output_file_name
182
+ logger.error(f'Failed to export model graph, file: {build_output_db_name}, error: {e}')
183
+ return build_output_db_name
197
184
 
198
185
 
199
186
  def is_real_data_compare(input_param, npu_ranks, bench_ranks):
@@ -211,9 +198,9 @@ def is_real_data_compare(input_param, npu_ranks, bench_ranks):
211
198
  return has_real_data
212
199
 
213
200
 
214
- def _mp_compare(input_param, serializable_args, output_file_name, nr, br):
201
+ def _mp_compare(input_param, serializable_args, nr, br):
215
202
  graph_task_info = _run_build_graph_compare(input_param, serializable_args, nr, br)
216
- return _run_graph_compare(graph_task_info, input_param, serializable_args, output_file_name)
203
+ return _run_graph_compare(graph_task_info, input_param, serializable_args)
217
204
 
218
205
 
219
206
  def _compare_graph_ranks(input_param, args, step=None):
@@ -266,25 +253,23 @@ def _get_compare_graph_results(input_param, serializable_args, step, pool, err_c
266
253
  for nr, br in zip(npu_ranks, bench_ranks):
267
254
  input_param['npu_path'] = os.path.join(dump_rank_n, nr)
268
255
  input_param['bench_path'] = os.path.join(dump_rank_b, br)
269
- output_file_name = f'compare_{step}_{nr}_{current_time}.vis' if step else f'compare_{nr}_{current_time}.vis'
256
+ build_key = f'{step}_{nr}' if step else f'{nr}'
270
257
  input_param_copy = deepcopy(input_param)
271
- mp_task_dict[output_file_name] = pool.apply_async(_run_build_graph_compare,
272
- args=(input_param_copy, serializable_args, nr, br),
273
- error_callback=err_call)
258
+ mp_task_dict[build_key] = pool.apply_async(_run_build_graph_compare,
259
+ args=(input_param_copy, serializable_args, nr, br),
260
+ error_callback=err_call)
274
261
 
275
262
  mp_res_dict = {k: v.get() for k, v in mp_task_dict.items()}
276
- for output_file_name, mp_res in mp_res_dict.items():
277
- compare_graph_results.append(_run_graph_compare(mp_res, input_param, serializable_args, output_file_name))
263
+ for mp_res in mp_res_dict.values():
264
+ compare_graph_results.append(_run_graph_compare(mp_res, input_param, serializable_args))
278
265
  else:
279
266
  compare_graph_tasks = []
280
267
  for nr, br in zip(npu_ranks, bench_ranks):
281
268
  input_param['npu_path'] = os.path.join(dump_rank_n, nr)
282
269
  input_param['bench_path'] = os.path.join(dump_rank_b, br)
283
- output_file_name = f'compare_{step}_{nr}_{current_time}.vis' if step else f'compare_{nr}_{current_time}.vis'
284
270
  input_param_copy = deepcopy(input_param)
285
271
  compare_graph_tasks.append(pool.apply_async(_mp_compare,
286
- args=(input_param_copy, serializable_args, output_file_name, nr,
287
- br),
272
+ args=(input_param_copy, serializable_args, nr, br),
288
273
  error_callback=err_call))
289
274
  compare_graph_results = [task.get() for task in compare_graph_tasks]
290
275
  if step is not None:
@@ -348,10 +333,7 @@ def _build_graph_ranks(dump_ranks_path, args, step=None):
348
333
  create_directory(args.output_path)
349
334
  export_build_graph_tasks = []
350
335
  serializable_args.rank_list = [result.rank for result in build_graph_results]
351
- for i, result in enumerate(build_graph_results):
352
- if args.parallel_params:
353
- result.output_file_name = f'build_{step}_merged{i}_{current_time}.vis' \
354
- if step else f'build_merged{i}_{current_time}.vis'
336
+ for result in build_graph_results:
355
337
  export_build_graph_tasks.append(pool.apply_async(_export_build_graph_result,
356
338
  args=(serializable_args, result),
357
339
  error_callback=err_call))
@@ -373,8 +355,8 @@ def _build_graph_steps(dump_steps_path, args):
373
355
  _build_graph_ranks(dump_ranks_path, args, step)
374
356
 
375
357
 
376
- def _compare_and_export_graph(graph_task_info, input_param, args, output_file_name):
377
- result = _run_graph_compare(graph_task_info, input_param, args, output_file_name)
358
+ def _compare_and_export_graph(graph_task_info, input_param, args):
359
+ result = _run_graph_compare(graph_task_info, input_param, args)
378
360
  return _export_compare_graph_result(args, result)
379
361
 
380
362
 
@@ -430,11 +412,8 @@ def _compare_graph_ranks_parallel(input_param, args, step=None):
430
412
  _build_graph_info(os.path.join(npu_path, f'rank{graph_n.root.rank}'), args, graph_n),
431
413
  _build_graph_info(os.path.join(bench_path, f'rank{graph_b.root.rank}'), args, graph_b),
432
414
  f'rank{graph_n.root.rank}', f'rank{graph_b.root.rank}', current_time)
433
- output_file_name = f'compare_{step}_merged{i}_{current_time}.vis' \
434
- if step else f'compare_merged{i}_{current_time}.vis'
435
415
  export_res_task_list.append(pool.apply_async(_compare_and_export_graph,
436
- args=(graph_task_info, input_param, serializable_args,
437
- output_file_name),
416
+ args=(graph_task_info, input_param, serializable_args),
438
417
  error_callback=err_call))
439
418
  export_res_list = [res.get() for res in export_res_task_list]
440
419
  if any(export_res_list):
@@ -455,8 +434,6 @@ def _graph_service_parser(parser):
455
434
  help="<Optional> whether open overflow_check for graph.", required=False)
456
435
  parser.add_argument("-f", "--fuzzy_match", dest="fuzzy_match", action="store_true",
457
436
  help="<Optional> Whether to perform a fuzzy match on the api name.", required=False)
458
- parser.add_argument("-cs", "--complete_stack", dest="complete_stack", action="store_true",
459
- help="<Optional> Whether to use complete stack information.", required=False)
460
437
 
461
438
 
462
439
  def _graph_service_command(args):
@@ -503,7 +480,8 @@ def _graph_service_command(args):
503
480
  else:
504
481
  logger.error("The npu_path or bench_path should be a folder.")
505
482
  raise CompareException(CompareException.INVALID_COMPARE_MODE)
506
- # 所有数据输出db结束后,添加索引,修改权限:post_process_db output_db_path
483
+ # 所有数据输出db结束后,添加索引,修改权限
484
+ post_process_db(output_db_path)
507
485
 
508
486
 
509
487
  def _pt_graph_service_parser(parser):
@@ -523,20 +501,18 @@ def _ms_graph_service_command(args):
523
501
 
524
502
 
525
503
  class CompareGraphResult:
526
- def __init__(self, graph_n, graph_b, graph_comparator, micro_steps, rank=0, step=0, output_file_name=''):
504
+ def __init__(self, graph_n, graph_b, graph_comparator, micro_steps, rank=0, step=0):
527
505
  self.graph_n = graph_n
528
506
  self.graph_b = graph_b
529
507
  self.graph_comparator = graph_comparator
530
508
  self.micro_steps = micro_steps
531
509
  self.rank = rank
532
510
  self.step = step
533
- self.output_file_name = output_file_name
534
511
 
535
512
 
536
513
  class BuildGraphResult:
537
- def __init__(self, graph, micro_steps=0, rank=0, step=0, output_file_name=''):
514
+ def __init__(self, graph, micro_steps=0, rank=0, step=0):
538
515
  self.graph = graph
539
516
  self.micro_steps = micro_steps
540
517
  self.rank = rank
541
518
  self.step = step
542
- self.output_file_name = output_file_name