PyPI - mindstudio-probe - Versions diffs - 8.1.2__py3-none-any.whl → 8.2.1__py3-none-any.whl - Mend

mindstudio-probe 8.1.2py3-none-any.whl → 8.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/METADATA +2 -2
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/RECORD +172 -147
msprobe/README.md +6 -6
msprobe/core/common/const.py +98 -41
msprobe/core/common/db_manager.py +256 -0
msprobe/core/common/file_utils.py +28 -5
msprobe/core/common/log.py +7 -0
msprobe/core/common/megatron_utils.py +59 -0
msprobe/core/common/parallel_state.py +193 -0
msprobe/core/common/utils.py +20 -13
msprobe/core/common_config.py +5 -0
msprobe/core/compare/acc_compare.py +140 -93
msprobe/core/compare/check.py +13 -0
msprobe/core/compare/compare_cli.py +64 -6
msprobe/core/compare/config.py +10 -8
msprobe/core/compare/diff_analyze/diff_analyze_threshold.yaml +14 -0
msprobe/core/compare/diff_analyze/first_diff_analyze.py +135 -0
msprobe/core/compare/diff_analyze/ignore_op_list.yaml +3 -0
msprobe/core/compare/find_first/__init__.py +0 -0
msprobe/core/compare/find_first/analyzer.py +282 -0
msprobe/core/compare/find_first/data_processor.py +35 -0
msprobe/core/compare/find_first/graph.py +188 -0
msprobe/core/compare/find_first/utils.py +189 -0
msprobe/core/compare/highlight.py +74 -101
msprobe/core/compare/layer_mapping/layer_mapping.py +14 -9
msprobe/core/compare/merge_result/merge_result.py +2 -2
msprobe/core/compare/multiprocessing_compute.py +45 -28
msprobe/core/compare/npy_compare.py +7 -10
msprobe/core/compare/utils.py +338 -130
msprobe/core/config_check/checkers/dataset_checker.py +2 -1
msprobe/core/config_check/checkers/env_args_checker.py +5 -5
msprobe/core/config_check/checkers/hyperparameter_checker.py +30 -10
msprobe/core/config_check/checkers/pip_checker.py +4 -3
msprobe/core/config_check/checkers/random_checker.py +3 -3
msprobe/core/config_check/checkers/weights_checker.py +2 -1
msprobe/core/config_check/ckpt_compare/megatron_loader.py +2 -0
msprobe/core/config_check/resource/hyperparameter.yaml +11 -1
msprobe/core/config_check/utils/hyperparameter_parser.py +7 -3
msprobe/core/config_check/utils/utils.py +10 -0
msprobe/core/data_dump/api_registry.py +49 -30
msprobe/core/data_dump/data_collector.py +71 -29
msprobe/core/data_dump/data_processor/base.py +2 -0
msprobe/core/data_dump/data_processor/mindspore_processor.py +47 -53
msprobe/core/data_dump/data_processor/pytorch_processor.py +227 -93
msprobe/core/data_dump/json_writer.py +81 -7
msprobe/core/data_dump/scope.py +4 -6
msprobe/core/hook_manager.py +129 -70
msprobe/core/monitor/csv2db.py +361 -0
msprobe/core/monitor/db_utils.py +278 -0
msprobe/core/monitor/utils.py +35 -1
msprobe/core/service.py +31 -39
msprobe/core/single_save/single_comparator.py +16 -3
msprobe/docs/01.installation.md +51 -19
msprobe/docs/02.config_introduction.md +16 -20
msprobe/docs/03.config_examples.md +26 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +6 -2
msprobe/docs/06.data_dump_MindSpore.md +44 -7
msprobe/docs/07.accuracy_checker_PyTorch.md +1 -1
msprobe/docs/10.accuracy_compare_PyTorch.md +124 -44
msprobe/docs/11.accuracy_compare_MindSpore.md +75 -7
msprobe/docs/14.data_parse_PyTorch.md +1 -1
msprobe/docs/19.monitor.md +94 -7
msprobe/docs/21.visualization_PyTorch.md +71 -101
msprobe/docs/22.visualization_MindSpore.md +69 -119
msprobe/docs/23.generate_operator_PyTorch.md +1 -1
msprobe/docs/25.tool_function_introduction.md +0 -1
msprobe/docs/26.data_dump_PyTorch_baseline.md +7 -7
msprobe/docs/28.debugger_save_instruction.md +184 -81
msprobe/docs/29.data_dump_MSAdapter.md +6 -0
msprobe/docs/31.config_check.md +4 -2
msprobe/docs/36.calculation_result_change.md +75 -0
msprobe/docs/FAQ.md +22 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +6 -2
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/3.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/4.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/5.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/6.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/7.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt +59 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt +80 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactory_mapping.md +330 -0
msprobe/mindspore/__init__.py +1 -1
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +1 -1
msprobe/mindspore/api_accuracy_checker/api_runner.py +9 -6
msprobe/mindspore/api_accuracy_checker/compute_element.py +18 -12
msprobe/mindspore/cell_processor.py +64 -25
msprobe/mindspore/common/utils.py +51 -7
msprobe/mindspore/compare/common_dir_compare.py +45 -37
msprobe/mindspore/compare/ms_compare.py +10 -2
msprobe/mindspore/compare/ms_graph_compare.py +47 -52
msprobe/mindspore/debugger/debugger_config.py +18 -7
msprobe/mindspore/debugger/precision_debugger.py +16 -12
msprobe/mindspore/dump/cell_dump_process.py +130 -68
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +10 -2
msprobe/mindspore/dump/graph_mode_cell_dump.py +35 -9
msprobe/mindspore/dump/graph_tensor_dump.py +11 -0
msprobe/mindspore/dump/hook_cell/api_register.py +19 -20
msprobe/mindspore/dump/hook_cell/hook_cell.py +12 -34
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +142 -21
msprobe/mindspore/dump/kernel_kbyk_dump.py +24 -0
msprobe/mindspore/exception_dump/__init__.py +0 -0
msprobe/mindspore/exception_dump/exception_dump_tool_factory.py +51 -0
msprobe/mindspore/exception_dump/kernel_graph_exception_dump.py +57 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +5 -4
msprobe/mindspore/mindspore_service.py +2 -2
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +12 -7
msprobe/mindspore/monitor/features.py +82 -0
msprobe/mindspore/monitor/module_hook.py +168 -10
msprobe/mindspore/monitor/utils.py +27 -1
msprobe/mindspore/ms_config.py +12 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +1 -1
msprobe/mindspore/task_handler_factory.py +3 -1
msprobe/nan_analyze/graph.py +1 -1
msprobe/pytorch/api_accuracy_checker/common/config.py +3 -36
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +0 -24
msprobe/pytorch/api_accuracy_checker/compare/compare.py +2 -12
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -6
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +12 -132
msprobe/pytorch/common/utils.py +1 -21
msprobe/pytorch/compare/pt_compare.py +10 -2
msprobe/pytorch/{hook_module/jit_script_wrapper.py → compare/pt_diff_analyze.py} +3 -15
msprobe/pytorch/compare/utils.py +2 -1
msprobe/pytorch/debugger/debugger_config.py +18 -23
msprobe/pytorch/dump/module_dump/hook_wrapper.py +10 -7
msprobe/pytorch/dump/module_dump/module_processer.py +41 -19
msprobe/pytorch/free_benchmark/main.py +7 -4
msprobe/pytorch/hook_module/api_register.py +62 -24
msprobe/pytorch/hook_module/hook_module.py +9 -29
msprobe/pytorch/hook_module/pt_hook_manager.py +84 -15
msprobe/pytorch/hook_module/script_wrapper.py +140 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +6 -0
msprobe/pytorch/monitor/csv2tb.py +1 -1
msprobe/pytorch/monitor/features.py +94 -0
msprobe/pytorch/monitor/module_hook.py +221 -81
msprobe/pytorch/monitor/module_metric.py +27 -1
msprobe/pytorch/monitor/optimizer_collect.py +109 -4
msprobe/pytorch/online_dispatch/dispatch.py +42 -24
msprobe/pytorch/online_dispatch/dump_compare.py +1 -1
msprobe/pytorch/parse_tool/lib/visualization.py +0 -1
msprobe/pytorch/pt_config.py +2 -51
msprobe/pytorch/pytorch_service.py +7 -14
msprobe/visualization/builder/graph_builder.py +192 -63
msprobe/visualization/builder/graph_merger.py +986 -0
msprobe/visualization/builder/msprobe_adapter.py +17 -15
msprobe/visualization/compare/graph_comparator.py +26 -16
msprobe/visualization/db_utils.py +252 -0
msprobe/visualization/graph/base_node.py +2 -22
msprobe/visualization/graph/distributed_analyzer.py +12 -12
msprobe/visualization/graph/graph.py +44 -16
msprobe/visualization/graph_service.py +143 -59
msprobe/visualization/utils.py +103 -4
msprobe/docs/08.accuracy_checker_online_PyTorch.md +0 -295
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +0 -205
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +0 -378
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +0 -239
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +0 -115
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +0 -250
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +0 -63
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +0 -198
msprobe/pytorch/attl_manager.py +0 -65
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/WHEEL +0 -0
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/top_level.txt +0 -0
/msprobe/{pytorch/api_accuracy_checker/tensor_transport_layer → core/compare/diff_analyze}/__init__.py +0 -0

msprobe/visualization/graph_service.py CHANGED Viewed

@@ -22,7 +22,8 @@ from msprobe.core.common.file_utils import (check_file_type, create_directory, F
 from msprobe.core.common.const import FileCheckConst, Const
 from msprobe.core.common.utils import CompareException, get_dump_mode
 from msprobe.visualization.compare.graph_comparator import GraphComparator
-from msprobe.visualization.utils import GraphConst, check_directory_content, SerializableArgs
+from msprobe.visualization.utils import GraphConst, check_directory_content, SerializableArgs, load_parallel_param, \
+    sort_rank_number_strings, check_whether_parallel_merge, validate_parallel_param, get_step_or_rank_int
 from msprobe.visualization.builder.graph_builder import GraphBuilder, GraphExportConfig, GraphInfo, BuildGraphTaskInfo
 from msprobe.core.common.log import logger
 from msprobe.visualization.graph.node_colors import NodeColors
@@ -30,8 +31,12 @@ from msprobe.core.compare.layer_mapping import generate_api_mapping_by_layer_map
 from msprobe.core.compare.utils import check_and_return_dir_contents
 from msprobe.core.common.utils import detect_framework_by_dump_json
 from msprobe.visualization.graph.distributed_analyzer import DistributedAnalyzer
+from msprobe.visualization.builder.graph_merger import GraphMerger
+from msprobe.visualization.db_utils import post_process_db
 current_time = time.strftime("%Y%m%d%H%M%S")
+build_output_db_name = f'build_{current_time}.vis.db'
+compare_output_db_name = f'compare_{current_time}.vis.db'
 def _compare_graph(graph_n: GraphInfo, graph_b: GraphInfo, input_param, args):
@@ -83,32 +88,32 @@ def _export_compare_graph_result(args, result):
     graphs = [result.graph_n, result.graph_b]
     graph_comparator = result.graph_comparator
     micro_steps = result.micro_steps
-    output_file_name = result.output_file_name
-    if not output_file_name:
-        output_file_name = f'compare_{current_time}.vis'
-    logger.info(f'Start exporting compare graph result, file name: {output_file_name}...')
-    output_path = os.path.join(args.output_path, output_file_name)
+    logger.info(f'Start exporting compare graph result, file name: {compare_output_db_name}...')
+    output_db_path = os.path.join(args.output_path, compare_output_db_name)
     task = GraphConst.GRAPHCOMPARE_MODE_TO_DUMP_MODE_TO_MAPPING.get(graph_comparator.ma.compare_mode)
     export_config = GraphExportConfig(graphs[0], graphs[1], graph_comparator.ma.get_tool_tip(),
                                       NodeColors.get_node_colors(graph_comparator.ma.compare_mode), micro_steps, task,
-                                      args.overflow_check, graph_comparator.ma.compare_mode)
+                                      args.overflow_check, graph_comparator.ma.compare_mode, result.step, result.rank,
+                                      args.step_list if hasattr(args, 'step_list') else [0],
+                                      args.rank_list if hasattr(args, 'rank_list') else [0])
     try:
-        GraphBuilder.to_json(output_path, export_config)
-        logger.info(f'Exporting compare graph result successfully, the result file is saved in {output_path}')
+        GraphBuilder.to_db(output_db_path, export_config)
+        logger.info(f'Exporting compare graph result successfully, the result file is saved in {output_db_path}')
         return ''
     except RuntimeError as e:
-        logger.error(f'Failed to export compare graph result, file: {output_file_name}, error: {e}')
-        return output_file_name
+        logger.error(f'Failed to export compare graph result, file: {compare_output_db_name}, error: {e}')
+        return compare_output_db_name
-def _build_graph_info(dump_path, args):
+def _build_graph_info(dump_path, args, graph=None):
     construct_path = FileChecker(os.path.join(dump_path, GraphConst.CONSTRUCT_FILE), FileCheckConst.FILE,
                                  FileCheckConst.READ_ABLE).common_check()
     data_path = FileChecker(os.path.join(dump_path, GraphConst.DUMP_FILE), FileCheckConst.FILE,
                             FileCheckConst.READ_ABLE).common_check()
     stack_path = FileChecker(os.path.join(dump_path, GraphConst.STACK_FILE), FileCheckConst.FILE,
                              FileCheckConst.READ_ABLE).common_check()
-    graph = GraphBuilder.build(construct_path, data_path, stack_path, complete_stack=args.complete_stack)
+    if not graph:
+        graph = GraphBuilder.build(construct_path, data_path, stack_path)
     return GraphInfo(graph, construct_path, data_path, stack_path)
@@ -134,20 +139,14 @@ def _run_build_graph_compare(input_param, args, nr, br):
 def _run_build_graph_single(dump_ranks_path, rank, step, args):
     logger.info(f'Start building graph for {rank}...')
     dump_path = os.path.join(dump_ranks_path, rank)
-    output_file_name = f'build_{step}_{rank}_{current_time}.vis' if step else f'build_{rank}_{current_time}.vis'
     result = _build_graph_result(dump_path, args)
-    result.output_file_name = output_file_name
     if rank != Const.RANK:
-        try:
-            result.rank = int(rank.replace(Const.RANK, ""))
-        except Exception as e:
-            logger.error('The folder name format is incorrect, expected rank+number.')
-            raise CompareException(CompareException.INVALID_PATH_ERROR) from e
+        result.rank = get_step_or_rank_int(rank, True)
     logger.info(f'Building graph for step: {step}, rank: {rank} finished.')
     return result
-def _run_graph_compare(graph_task_info, input_param, args, output_file_name):
+def _run_graph_compare(graph_task_info, input_param, args):
     logger.info(f'Start comparing data for {graph_task_info.npu_rank}...')
     graph_n = graph_task_info.graph_info_n
     graph_b = graph_task_info.graph_info_b
@@ -159,13 +158,8 @@ def _run_graph_compare(graph_task_info, input_param, args, output_file_name):
         graph_n.graph.overflow_check()
         graph_b.graph.overflow_check()
     graph_result = CompareGraphResult(graph_n.graph, graph_b.graph, graph_comparator, micro_steps)
-    graph_result.output_file_name = output_file_name
     if nr != Const.RANK:
-        try:
-            graph_result.rank = int(nr.replace(Const.RANK, ""))
-        except Exception as e:
-            logger.error('The folder name format is incorrect, expected rank+number.')
-            raise CompareException(CompareException.INVALID_PATH_ERROR) from e
+        graph_result.rank = get_step_or_rank_int(nr, True)
     logger.info(f'Comparing data for {graph_task_info.npu_rank} finished.')
     return graph_result
@@ -175,19 +169,18 @@ def _export_build_graph_result(args, result):
     graph = result.graph
     micro_steps = result.micro_steps
     overflow_check = args.overflow_check
-    output_file_name = result.output_file_name
-    if not output_file_name:
-        output_file_name = f'build_{current_time}.vis'
-    logger.info(f'Start exporting graph for {output_file_name}...')
-    output_path = os.path.join(out_path, output_file_name)
+    logger.info(f'Start exporting graph for {build_output_db_name}...')
+    output_db_path = os.path.join(out_path, build_output_db_name)
+    config = GraphExportConfig(graph, micro_steps=micro_steps, overflow_check=overflow_check, rank=result.rank,
+                               step=result.step, rank_list=args.rank_list if hasattr(args, 'rank_list') else [0],
+                               step_list=args.step_list if hasattr(args, 'step_list') else [0])
     try:
-        GraphBuilder.to_json(output_path, GraphExportConfig(graph, micro_steps=micro_steps,
-                                                            overflow_check=overflow_check))
-        logger.info(f'Model graph exported successfully, the result file is saved in {output_path}')
+        GraphBuilder.to_db(output_db_path, config)
+        logger.info(f'Model graph exported successfully, the result file is saved in {output_db_path}')
         return None
     except RuntimeError as e:
-        logger.error(f'Failed to export model graph, file: {output_file_name}, error: {e}')
-        return output_file_name
+        logger.error(f'Failed to export model graph, file: {build_output_db_name}, error: {e}')
+        return build_output_db_name
 def is_real_data_compare(input_param, npu_ranks, bench_ranks):
@@ -205,9 +198,9 @@ def is_real_data_compare(input_param, npu_ranks, bench_ranks):
     return has_real_data
-def _mp_compare(input_param, serializable_args, output_file_name, nr, br):
+def _mp_compare(input_param, serializable_args, nr, br):
     graph_task_info = _run_build_graph_compare(input_param, serializable_args, nr, br)
-    return _run_graph_compare(graph_task_info, input_param, serializable_args, output_file_name)
+    return _run_graph_compare(graph_task_info, input_param, serializable_args)
 def _compare_graph_ranks(input_param, args, step=None):
@@ -223,6 +216,8 @@ def _compare_graph_ranks(input_param, args, step=None):
         # 暂存所有rank的graph，用于匹配rank间的分布式节点
         compare_graph_results = _get_compare_graph_results(input_param, serializable_args, step, pool, err_call)
+        serializable_args.rank_list = [result.rank for result in compare_graph_results]
         # 匹配rank间的分布式节点
         if len(compare_graph_results) > 1:
             DistributedAnalyzer({obj.rank: obj.graph_n for obj in compare_graph_results},
@@ -258,27 +253,28 @@ def _get_compare_graph_results(input_param, serializable_args, step, pool, err_c
         for nr, br in zip(npu_ranks, bench_ranks):
             input_param['npu_path'] = os.path.join(dump_rank_n, nr)
             input_param['bench_path'] = os.path.join(dump_rank_b, br)
-            output_file_name = f'compare_{step}_{nr}_{current_time}.vis' if step else f'compare_{nr}_{current_time}.vis'
+            build_key = f'{step}_{nr}' if step else f'{nr}'
             input_param_copy = deepcopy(input_param)
-            mp_task_dict[output_file_name] = pool.apply_async(_run_build_graph_compare,
-                                                              args=(input_param_copy, serializable_args, nr, br),
-                                                              error_callback=err_call)
+            mp_task_dict[build_key] = pool.apply_async(_run_build_graph_compare,
+                                                       args=(input_param_copy, serializable_args, nr, br),
+                                                       error_callback=err_call)
         mp_res_dict = {k: v.get() for k, v in mp_task_dict.items()}
-        for output_file_name, mp_res in mp_res_dict.items():
-            compare_graph_results.append(_run_graph_compare(mp_res, input_param, serializable_args, output_file_name))
+        for mp_res in mp_res_dict.values():
+            compare_graph_results.append(_run_graph_compare(mp_res, input_param, serializable_args))
     else:
         compare_graph_tasks = []
         for nr, br in zip(npu_ranks, bench_ranks):
             input_param['npu_path'] = os.path.join(dump_rank_n, nr)
             input_param['bench_path'] = os.path.join(dump_rank_b, br)
-            output_file_name = f'compare_{step}_{nr}_{current_time}.vis' if step else f'compare_{nr}_{current_time}.vis'
             input_param_copy = deepcopy(input_param)
             compare_graph_tasks.append(pool.apply_async(_mp_compare,
-                                                        args=(input_param_copy, serializable_args, output_file_name, nr,
-                                                              br),
+                                                        args=(input_param_copy, serializable_args, nr, br),
                                                         error_callback=err_call))
         compare_graph_results = [task.get() for task in compare_graph_tasks]
+    if step is not None:
+        for result in compare_graph_results:
+            result.step = get_step_or_rank_int(step)
     return compare_graph_results
@@ -293,16 +289,19 @@ def _compare_graph_steps(input_param, args):
         logger.error('The number of steps in the two runs is different. Unable to match the steps.')
         raise CompareException(CompareException.INVALID_PATH_ERROR)
+    args.step_list = sorted([get_step_or_rank_int(step) for step in npu_steps])
     for folder_step in npu_steps:
         logger.info(f'Start processing data for {folder_step}...')
         input_param['npu_path'] = os.path.join(dump_step_n, folder_step)
         input_param['bench_path'] = os.path.join(dump_step_b, folder_step)
-        _compare_graph_ranks(input_param, args, step=folder_step)
+        _compare_graph_ranks(input_param, args, step=folder_step) if not args.parallel_merge \
+            else _compare_graph_ranks_parallel(input_param, args, step=folder_step)
 def _build_graph_ranks(dump_ranks_path, args, step=None):
-    ranks = sorted(check_and_return_dir_contents(dump_ranks_path, Const.RANK))
+    ranks = sort_rank_number_strings(check_and_return_dir_contents(dump_ranks_path, Const.RANK))
     serializable_args = SerializableArgs(args)
     with Pool(processes=max(int((cpu_count() + 1) // 4), 1)) as pool:
         def err_call(err):
@@ -319,12 +318,21 @@ def _build_graph_ranks(dump_ranks_path, args, step=None):
                                                       error_callback=err_call))
         build_graph_results = [task.get() for task in build_graph_tasks]
-        if len(build_graph_results) > 1:
+        if step is not None:
+            for result in build_graph_results:
+                result.step = get_step_or_rank_int(step)
+        if args.parallel_params:
+            validate_parallel_param(args.parallel_params[0], dump_ranks_path)
+            build_graph_results = GraphMerger(build_graph_results, args.parallel_params[0]).merge_graph()
+        if len(build_graph_results) > 1 and not args.parallel_merge:
             DistributedAnalyzer({obj.rank: obj.graph for obj in build_graph_results},
                                 args.overflow_check).distributed_match()
         create_directory(args.output_path)
         export_build_graph_tasks = []
+        serializable_args.rank_list = [result.rank for result in build_graph_results]
         for result in build_graph_results:
             export_build_graph_tasks.append(pool.apply_async(_export_build_graph_result,
                                                              args=(serializable_args, result),
@@ -337,15 +345,84 @@ def _build_graph_ranks(dump_ranks_path, args, step=None):
             logger.info(f'Successfully exported build graph results.')
 def _build_graph_steps(dump_steps_path, args):
     steps = sorted(check_and_return_dir_contents(dump_steps_path, Const.STEP))
+    args.step_list = sorted([get_step_or_rank_int(step) for step in steps])
     for step in steps:
         logger.info(f'Start processing data for {step}...')
         dump_ranks_path = os.path.join(dump_steps_path, step)
         _build_graph_ranks(dump_ranks_path, args, step)
+def _compare_and_export_graph(graph_task_info, input_param, args):
+    result = _run_graph_compare(graph_task_info, input_param, args)
+    return _export_compare_graph_result(args, result)
+def _compare_graph_ranks_parallel(input_param, args, step=None):
+    args.fuzzy_match = True
+    npu_path = input_param.get('npu_path')
+    bench_path = input_param.get('bench_path')
+    ranks_n = sort_rank_number_strings(check_and_return_dir_contents(npu_path, Const.RANK))
+    ranks_b = sort_rank_number_strings(check_and_return_dir_contents(bench_path, Const.RANK))
+    parallel_params = load_parallel_param(input_param)
+    if len(parallel_params) != 2:
+        raise RuntimeError('Parallel params error in compare graph!')
+    validate_parallel_param(parallel_params[0], npu_path)
+    validate_parallel_param(parallel_params[1], bench_path, '[Bench]')
+    serializable_args = SerializableArgs(args)
+    with Pool(processes=max(int((cpu_count() + 1) // 4), 1)) as pool:
+        def err_call(err):
+            logger.error(f'Error occurred while comparing graph ranks: {err}')
+            try:
+                pool.close()
+            except OSError as e:
+                logger.error(f'Error occurred while terminating the pool: {e}')
+        # 1.并行构图
+        build_graph_tasks_n = []
+        build_graph_tasks_b = []
+        for rank in ranks_n:
+            build_graph_tasks_n.append(pool.apply_async(_run_build_graph_single,
+                                                        args=(npu_path, rank, step, serializable_args),
+                                                        error_callback=err_call))
+        for rank in ranks_b:
+            build_graph_tasks_b.append(pool.apply_async(_run_build_graph_single,
+                                                        args=(bench_path, rank, step, serializable_args),
+                                                        error_callback=err_call))
+        graph_results_n = [task.get() for task in build_graph_tasks_n]
+        graph_results_b = [task.get() for task in build_graph_tasks_b]
+        # 2.图合并
+        build_graph_results_n = GraphMerger(graph_results_n, parallel_params[0]).merge_graph()
+        build_graph_results_b = GraphMerger(graph_results_b, parallel_params[1], True).merge_graph()
+        if len(build_graph_results_n) != len(build_graph_results_b):
+            raise RuntimeError(f'Parallel merge failed because the dp of npu: {len(build_graph_results_n)} '
+                               f'is inconsistent with that of bench: {len(build_graph_results_b)}!')
+        serializable_args.rank_list = [result.rank for result in build_graph_results_n]
+        # 3.并行图比对和输出
+        export_res_task_list = []
+        create_directory(args.output_path)
+        for i, result_n in enumerate(build_graph_results_n):
+            graph_n = result_n.graph
+            graph_b = build_graph_results_b[i].graph
+            graph_task_info = BuildGraphTaskInfo(
+                _build_graph_info(os.path.join(npu_path, f'rank{graph_n.root.rank}'), args, graph_n),
+                _build_graph_info(os.path.join(bench_path, f'rank{graph_b.root.rank}'), args, graph_b),
+                f'rank{graph_n.root.rank}', f'rank{graph_b.root.rank}', current_time)
+            export_res_task_list.append(pool.apply_async(_compare_and_export_graph,
+                                                         args=(graph_task_info, input_param, serializable_args),
+                                                         error_callback=err_call))
+        export_res_list = [res.get() for res in export_res_task_list]
+        if any(export_res_list):
+            failed_names = list(filter(lambda x: x, export_res_list))
+            logger.error(f'Unable to export compare graph results: {", ".join(failed_names)}.')
+        else:
+            logger.info('Successfully exported compare graph results.')
 def _graph_service_parser(parser):
     parser.add_argument("-i", "--input_path", dest="input_path", type=str,
                         help="<Required> The compare input path, a dict json.", required=True)
@@ -357,19 +434,20 @@ def _graph_service_parser(parser):
                         help="<Optional> whether open overflow_check for graph.", required=False)
     parser.add_argument("-f", "--fuzzy_match", dest="fuzzy_match", action="store_true",
                         help="<Optional> Whether to perform a fuzzy match on the api name.", required=False)
-    parser.add_argument("-cs", "--complete_stack", dest="complete_stack", action="store_true",
-                        help="<Optional> Whether to use complete stack information.", required=False)
 def _graph_service_command(args):
     input_param = load_json(args.input_path)
     npu_path = input_param.get("npu_path")
     bench_path = input_param.get("bench_path")
+    args.parallel_merge = check_whether_parallel_merge(input_param)
+    args.parallel_params = load_parallel_param(input_param) if args.parallel_merge else None
     check_file_or_directory_path(npu_path, isdir=True)
     if bench_path:
         check_file_or_directory_path(bench_path, isdir=True)
     if check_file_type(npu_path) == FileCheckConst.DIR and not bench_path:
         content = check_directory_content(npu_path)
+        output_db_path = os.path.join(args.output_path, build_output_db_name)
         if content == GraphConst.RANKS:
             _build_graph_ranks(npu_path, args)
         elif content == GraphConst.STEPS:
@@ -383,10 +461,14 @@ def _graph_service_command(args):
     elif check_file_type(npu_path) == FileCheckConst.DIR and check_file_type(bench_path) == FileCheckConst.DIR:
         content_n = check_directory_content(npu_path)
         content_b = check_directory_content(bench_path)
+        output_db_path = os.path.join(args.output_path, compare_output_db_name)
         if content_n != content_b:
             raise ValueError('The directory structures of npu_path and bench_path are inconsistent.')
         if content_n == GraphConst.RANKS:
-            _compare_graph_ranks(input_param, args)
+            if args.parallel_merge:
+                _compare_graph_ranks_parallel(input_param, args)
+            else:
+                _compare_graph_ranks(input_param, args)
         elif content_n == GraphConst.STEPS:
             _compare_graph_steps(input_param, args)
         else:
@@ -398,6 +480,8 @@ def _graph_service_command(args):
     else:
         logger.error("The npu_path or bench_path should be a folder.")
         raise CompareException(CompareException.INVALID_COMPARE_MODE)
+    # 所有数据输出db结束后，添加索引，修改权限
+    post_process_db(output_db_path)
 def _pt_graph_service_parser(parser):
@@ -417,18 +501,18 @@ def _ms_graph_service_command(args):
 class CompareGraphResult:
-    def __init__(self, graph_n, graph_b, graph_comparator, micro_steps, rank=0, output_file_name=''):
+    def __init__(self, graph_n, graph_b, graph_comparator, micro_steps, rank=0, step=0):
         self.graph_n = graph_n
         self.graph_b = graph_b
         self.graph_comparator = graph_comparator
         self.micro_steps = micro_steps
         self.rank = rank
-        self.output_file_name = output_file_name
+        self.step = step
 class BuildGraphResult:
-    def __init__(self, graph, micro_steps, rank=0, output_file_name=''):
+    def __init__(self, graph, micro_steps=0, rank=0, step=0):
         self.graph = graph
         self.micro_steps = micro_steps
         self.rank = rank
-        self.output_file_name = output_file_name
+        self.step = step

msprobe/visualization/utils.py CHANGED Viewed

@@ -20,6 +20,8 @@ import pickle
 from msprobe.core.common.file_utils import FileOpen
 from msprobe.core.common.const import CompareConst, Const
 from msprobe.core.common.log import logger
+from msprobe.core.common.exceptions import MsprobeException
+from msprobe.core.compare.utils import check_and_return_dir_contents
 def load_json_file(file_path):
@@ -57,6 +59,21 @@ def str2float(percentage_str):
         return 0
+def get_step_or_rank_int(x: str, is_rank=False):
+    """
+    获取字符串rank{int}或者step{int}中的int值，如果x=rank或step，返回0
+    """
+    if x in [Const.RANK, Const.STEP]:
+        return 0
+    description = Const.RANK if is_rank else Const.STEP
+    try:
+        x_int = int(x.replace(Const.RANK, "")) if is_rank else int(x.replace(Const.STEP, ""))
+    except Exception as e:
+        logger.error(f'The folder name format is incorrect, expected {description}+number, such as rank0, step1, etc.')
+        raise RuntimeError from e
+    return x_int
 def check_directory_content(input_path):
     """
     检查input_path内容, 是否全是step{数字}命名的文件夹(例如step0), 或者全是rank{数字}命名的文件夹(例如rank0), 或者全是文件
@@ -102,6 +119,83 @@ def check_directory_content(input_path):
                      "all rank{number} named folders (such as rank0), or all files.")
+def extract_rank_number(rank_str):
+    try:
+        return int(rank_str[4:])
+    except ValueError:
+        return 0
+def sort_rank_number_strings(rank_number_strings):
+    sorted_list = sorted(rank_number_strings, key=extract_rank_number)
+    return sorted_list
+def check_whether_parallel_merge(input_param):
+    parallel_merge = input_param.get("parallel_merge")
+    if not isinstance(parallel_merge, dict) or not parallel_merge:
+        return False
+    if not parallel_merge.get('npu'):
+        return False
+    return True
+def load_parallel_param(input_param):
+    parallel_merge = input_param.get("parallel_merge", {})
+    config_n = parallel_merge.get('npu', {})
+    config_b = parallel_merge.get('bench', {})
+    param_n = ParallelParam(config_n.get('rank_size'), config_n.get('tp'), config_n.get('pp'), config_n.get('vpp', 1),
+                            config_n.get('order', 'tp-cp-ep-dp-pp'))
+    param_b = ParallelParam(config_b.get('rank_size'), config_b.get('tp'), config_b.get('pp'), config_b.get('vpp', 1),
+                            config_b.get('order', 'tp-cp-ep-dp-pp'))
+    return (param_n,) if not config_b else (param_n, param_b)
+def validate_parallel_param(parallel_param, dump_path, log_prefix='[NPU]'):
+    params = [parallel_param.tp, parallel_param.pp, parallel_param.rank_size]
+    ranks = check_and_return_dir_contents(dump_path, Const.RANK)
+    if len(ranks) != parallel_param.rank_size:
+        logger.error(f'{log_prefix} The parallel param "rank_size" error, '
+                     f'you set {parallel_param.rank_size} but expected {len(ranks)}.')
+        raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
+    if any(x is None for x in params):
+        logger.error(f'{log_prefix} The parallel params "tp/pp/rank_size" must not be null!')
+        raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
+    if any(x <= 0 for x in params):
+        logger.error(f'{log_prefix} The parallel params "tp/pp/vpp/rank_size" must be greater than 0!')
+        raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
+    if parallel_param.tp > parallel_param.rank_size:
+        logger.error(f'{log_prefix} The parallel param "tp" must be less than or equal to "rank_size"!')
+        raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
+    if parallel_param.pp > parallel_param.rank_size:
+        logger.error(f'{log_prefix} The parallel param "pp" must be less than or equal to "rank_size"!')
+        raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
+    if parallel_param.rank_size % parallel_param.tp != 0:
+        logger.error(f'{log_prefix} The parallel param "rank_size" must be divisible by "tp"!')
+        raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
+    if parallel_param.rank_size % parallel_param.pp != 0:
+        logger.error(f'{log_prefix} The parallel param "rank_size" must be divisible by "pp"!')
+        raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
+    if parallel_param.tp * parallel_param.pp > parallel_param.rank_size:
+        logger.error(f'{log_prefix} The parallel params "tp * pp" must be less than or equal to "rank_size"!')
+        raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
+    if parallel_param.vpp > 1 and parallel_param.pp < 2:
+        logger.error(f'{log_prefix} When configuring the parallel param "vpp", the "pp" param must be greater than 1!')
+        raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
+    if not isinstance(parallel_param.order, str):
+        logger.error(f'{log_prefix} The parallel params "order" must be of string type!')
+        raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
+class ParallelParam:
+    def __init__(self, rank_size, tp, pp, vpp=1, order='tp-cp-ep-dp-pp'):
+        self.rank_size = rank_size
+        self.tp = tp
+        self.pp = pp
+        self.vpp = vpp
+        self.order = order
 class ToolTip:
     MAX_DIFF = 'NPU与标杆API统计信息比对，最大值的差值'
     MIN_DIFF = 'NPU与标杆API统计信息比对，最小值的差值'
@@ -147,10 +241,11 @@ class GraphConst:
     INPUT = '.input.'
     OUTPUT = '.output.'
     STR_MAX_LEN = 50
-    MD5_INDEX_LIST = [CompareConst.RESULT]
-    REAL_DATA_INDEX_LIST = CompareConst.ALL_COMPARE_INDEX
-    SUMMARY_INDEX_LIST = CompareConst.SUMMARY_COMPARE_INDEX
+    MD5_INDEX_LIST = CompareConst.MD5_COMPARE_INDEX + [CompareConst.REQ_GRAD_CONSIST]
+    REAL_DATA_INDEX_LIST = CompareConst.ALL_COMPARE_INDEX + [CompareConst.REQ_GRAD_CONSIST]
+    SUMMARY_INDEX_LIST = CompareConst.SUMMARY_COMPARE_INDEX + [CompareConst.REQ_GRAD_CONSIST]
     APIS_BETWEEN_MODULES = 'Apis_Between_Modules'
+    APIS_BETWEEN_MODULES_ALL_RANKS = 'Apis_Between_Modules_All_Ranks'
     NULL = 'null'
     NONE = 'None'
     VALUE = 'value'
@@ -184,9 +279,13 @@ class GraphConst:
     OP = 'op'
     PEER = 'peer'
     GROUP_ID = 'group_id'
+    UNCERTAINTY_THRESHOLD = 1e-6
+    REDUCE_OPERATIONS = ['reduce_scatter', 'all_reduce']
     IGNORE_PRECISION_INDEX = {'empty', 'empty_like', 'empty_with_format', 'new_empty_strided', 'new_empty',
                               'empty_strided'}
+    VPP_CHUNK_0 = '0'
 def is_serializable(obj):

mindstudio-probe 8.1.2__py3-none-any.whl → 8.2.1__py3-none-any.whl

mindstudio-probe 8.1.2py3-none-any.whl → 8.2.1py3-none-any.whl