PyPI - mindstudio-probe - Versions diffs - 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.2py3-none-any.whl → 8.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (261) hide show

{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/METADATA +4 -3
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/RECORD +243 -191
msprobe/README.md +57 -21
msprobe/core/__init__.py +17 -0
msprobe/core/common/const.py +224 -82
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +5 -3
msprobe/core/common/file_utils.py +274 -40
msprobe/core/common/framework_adapter.py +169 -0
msprobe/core/common/global_lock.py +86 -0
msprobe/core/common/runtime.py +25 -0
msprobe/core/common/utils.py +148 -72
msprobe/core/common_config.py +7 -0
msprobe/core/compare/acc_compare.py +640 -462
msprobe/core/compare/check.py +36 -107
msprobe/core/compare/compare_cli.py +4 -0
msprobe/core/compare/config.py +72 -0
msprobe/core/compare/highlight.py +217 -215
msprobe/core/compare/layer_mapping/layer_mapping.py +4 -1
msprobe/core/compare/merge_result/merge_result.py +12 -6
msprobe/core/compare/multiprocessing_compute.py +227 -107
msprobe/core/compare/npy_compare.py +32 -16
msprobe/core/compare/utils.py +218 -244
msprobe/{mindspore/runtime.py → core/config_check/__init__.py} +2 -4
msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
msprobe/core/config_check/checkers/base_checker.py +60 -0
msprobe/core/config_check/checkers/dataset_checker.py +138 -0
msprobe/core/config_check/checkers/env_args_checker.py +96 -0
msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
msprobe/core/config_check/checkers/pip_checker.py +90 -0
msprobe/core/config_check/checkers/random_checker.py +367 -0
msprobe/core/config_check/checkers/weights_checker.py +147 -0
msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
msprobe/core/config_check/config_check_cli.py +51 -0
msprobe/core/config_check/config_checker.py +100 -0
msprobe/{pytorch/parse.py → core/config_check/resource/dependency.yaml} +7 -4
msprobe/core/config_check/resource/env.yaml +57 -0
msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
msprobe/core/config_check/utils/utils.py +107 -0
msprobe/core/data_dump/api_registry.py +239 -0
msprobe/core/data_dump/data_collector.py +36 -9
msprobe/core/data_dump/data_processor/base.py +74 -53
msprobe/core/data_dump/data_processor/mindspore_processor.py +119 -78
msprobe/core/data_dump/data_processor/pytorch_processor.py +134 -96
msprobe/core/data_dump/json_writer.py +146 -57
msprobe/core/debugger/precision_debugger.py +143 -0
msprobe/core/grad_probe/constant.py +2 -1
msprobe/core/grad_probe/grad_compare.py +2 -2
msprobe/core/grad_probe/utils.py +1 -1
msprobe/core/hook_manager.py +242 -0
msprobe/core/monitor/anomaly_processor.py +384 -0
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/core/service.py +356 -0
msprobe/core/single_save/__init__.py +0 -0
msprobe/core/single_save/single_comparator.py +243 -0
msprobe/core/single_save/single_saver.py +157 -0
msprobe/docs/01.installation.md +6 -5
msprobe/docs/02.config_introduction.md +89 -30
msprobe/docs/03.config_examples.md +1 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +184 -50
msprobe/docs/06.data_dump_MindSpore.md +193 -28
msprobe/docs/07.accuracy_checker_PyTorch.md +13 -3
msprobe/docs/08.accuracy_checker_online_PyTorch.md +72 -10
msprobe/docs/09.accuracy_checker_MindSpore.md +19 -7
msprobe/docs/10.accuracy_compare_PyTorch.md +266 -102
msprobe/docs/11.accuracy_compare_MindSpore.md +117 -43
msprobe/docs/12.overflow_check_PyTorch.md +5 -3
msprobe/docs/13.overflow_check_MindSpore.md +6 -4
msprobe/docs/14.data_parse_PyTorch.md +4 -10
msprobe/docs/17.grad_probe.md +2 -1
msprobe/docs/18.online_dispatch.md +3 -3
msprobe/docs/19.monitor.md +211 -103
msprobe/docs/21.visualization_PyTorch.md +100 -28
msprobe/docs/22.visualization_MindSpore.md +103 -31
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/25.tool_function_introduction.md +23 -22
msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
msprobe/docs/27.dump_json_instruction.md +278 -8
msprobe/docs/28.debugger_save_instruction.md +111 -20
msprobe/docs/28.kernel_dump_MindSpore.md +1 -1
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/31.config_check.md +95 -0
msprobe/docs/32.ckpt_compare.md +69 -0
msprobe/docs/33.generate_operator_MindSpore.md +190 -0
msprobe/docs/34.RL_collect.md +92 -0
msprobe/docs/35.nan_analyze.md +72 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/save_compare_result_sample.png +0 -0
msprobe/docs/img/visualization/proxy.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +3 -3
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -55
msprobe/mindspore/api_accuracy_checker/api_runner.py +25 -11
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +580 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
msprobe/mindspore/api_accuracy_checker/data_manager.py +4 -3
msprobe/mindspore/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +451 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +11 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/cell_processor.py +204 -33
msprobe/mindspore/code_mapping/graph_parser.py +4 -21
msprobe/mindspore/common/const.py +73 -2
msprobe/mindspore/common/utils.py +157 -29
msprobe/mindspore/compare/common_dir_compare.py +382 -0
msprobe/mindspore/compare/distributed_compare.py +2 -26
msprobe/mindspore/compare/ms_compare.py +18 -398
msprobe/mindspore/compare/ms_graph_compare.py +20 -10
msprobe/mindspore/compare/utils.py +37 -0
msprobe/mindspore/debugger/debugger_config.py +59 -7
msprobe/mindspore/debugger/precision_debugger.py +83 -90
msprobe/mindspore/dump/cell_dump_process.py +902 -0
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +889 -0
msprobe/mindspore/dump/dump_tool_factory.py +18 -8
msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
msprobe/mindspore/dump/hook_cell/api_register.py +176 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +22 -12
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +42 -26
msprobe/mindspore/dump/jit_dump.py +35 -27
msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -16
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +22 -12
msprobe/mindspore/free_benchmark/common/utils.py +1 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +9 -2
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/mindspore_service.py +111 -0
msprobe/mindspore/monitor/common_func.py +52 -0
msprobe/mindspore/monitor/data_writers.py +237 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +1 -1
msprobe/mindspore/monitor/features.py +13 -1
msprobe/mindspore/monitor/module_hook.py +568 -444
msprobe/mindspore/monitor/optimizer_collect.py +331 -0
msprobe/mindspore/monitor/utils.py +71 -9
msprobe/mindspore/ms_config.py +16 -15
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/task_handler_factory.py +5 -2
msprobe/msprobe.py +19 -0
msprobe/nan_analyze/__init__.py +14 -0
msprobe/nan_analyze/analyzer.py +255 -0
msprobe/nan_analyze/graph.py +189 -0
msprobe/nan_analyze/utils.py +211 -0
msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +15 -13
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +206 -4
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +9 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +6 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +31 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -20
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +154 -0
msprobe/pytorch/attl_manager.py +65 -0
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +6 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
msprobe/pytorch/common/utils.py +53 -19
msprobe/pytorch/compare/distributed_compare.py +4 -36
msprobe/pytorch/compare/pt_compare.py +13 -84
msprobe/pytorch/compare/utils.py +47 -0
msprobe/pytorch/debugger/debugger_config.py +34 -17
msprobe/pytorch/debugger/precision_debugger.py +50 -96
msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
msprobe/pytorch/dump/module_dump/module_dump.py +15 -61
msprobe/pytorch/dump/module_dump/module_processer.py +150 -114
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +1 -1
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
msprobe/pytorch/hook_module/api_register.py +155 -0
msprobe/pytorch/hook_module/hook_module.py +18 -22
msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +193 -75
msprobe/pytorch/hook_module/utils.py +28 -2
msprobe/pytorch/monitor/csv2tb.py +14 -4
msprobe/pytorch/monitor/data_writers.py +259 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +336 -241
msprobe/pytorch/monitor/module_metric.py +17 -0
msprobe/pytorch/monitor/optimizer_collect.py +244 -224
msprobe/pytorch/monitor/utils.py +84 -4
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +13 -2
msprobe/pytorch/online_dispatch/dump_compare.py +8 -2
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +5 -4
msprobe/pytorch/pt_config.py +16 -11
msprobe/pytorch/pytorch_service.py +70 -0
msprobe/visualization/builder/graph_builder.py +69 -10
msprobe/visualization/builder/msprobe_adapter.py +24 -12
msprobe/visualization/compare/graph_comparator.py +63 -51
msprobe/visualization/compare/mode_adapter.py +22 -20
msprobe/visualization/graph/base_node.py +11 -4
msprobe/visualization/graph/distributed_analyzer.py +1 -10
msprobe/visualization/graph/graph.py +2 -13
msprobe/visualization/graph/node_op.py +1 -2
msprobe/visualization/graph_service.py +251 -104
msprobe/visualization/utils.py +26 -44
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -207
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -140
msprobe/mindspore/monitor/anomaly_detect.py +0 -404
msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
msprobe/mindspore/service.py +0 -543
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -79
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
msprobe/pytorch/monitor/anomaly_detect.py +0 -410
msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
msprobe/pytorch/service.py +0 -470
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
/msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
/msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0

msprobe/core/compare/highlight.py CHANGED Viewed

@@ -30,12 +30,7 @@ from msprobe.core.common.file_utils import save_workbook
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import get_header_index, safe_get_value
 from msprobe.core.compare.utils import table_value_is_valid, get_name_and_state, CompareException
-class HighlightCheck(abc.ABC):
-    @abc.abstractmethod
-    def apply(self, info, color_columns, dump_mode):
-        raise NotImplementedError
+from msprobe.core.compare.config import ModeConfig
 def add_highlight_row_info(color_list, num, highlight_err_msg):
@@ -46,6 +41,12 @@ def add_highlight_row_info(color_list, num, highlight_err_msg):
     color_list.append((num, [highlight_err_msg]))
+class HighlightCheck(abc.ABC):
+    @abc.abstractmethod
+    def apply(self, info, color_columns, dump_mode):
+        raise NotImplementedError
 class CheckOrderMagnitude(HighlightCheck):
     """检查Max diff的数量级差异"""
@@ -75,12 +76,12 @@ class CheckOneThousandErrorRatio(HighlightCheck):
         if (api_in[one_thousand_index] > CompareConst.ONE_THOUSAND_ERROR_IN_RED and
                 api_out[one_thousand_index] < CompareConst.ONE_THOUSAND_ERROR_OUT_RED):
             add_highlight_row_info(color_columns.red, num,
-                                   "The input/parameters's one thousandth err ratio exceeds 0.9, "
+                                   "The input/parameter's one thousandth err ratio exceeds 0.9, "
                                    "while the output's is below 0.6")
         elif api_in[one_thousand_index] - api_out[one_thousand_index] > CompareConst.ONE_THOUSAND_ERROR_DIFF_YELLOW:
             add_highlight_row_info(color_columns.yellow, num,
                                    "The output's one thousandth err ratio decreases by more than 0.1 "
-                                   "compared to the input/parameters's")
+                                   "compared to the input/parameter's")
 class CheckCosineSimilarity(HighlightCheck):
@@ -94,7 +95,7 @@ class CheckCosineSimilarity(HighlightCheck):
         if api_in[cosine_index] - api_out[cosine_index] > CompareConst.COSINE_DIFF_YELLOW:
             add_highlight_row_info(color_columns.yellow, num,
                                    "The output's cosine decreases by more than 0.1 "
-                                   "compared to the input/parameters's")
+                                   "compared to the input/parameter's")
 class CheckMaxRelativeDiff(HighlightCheck):
@@ -117,7 +118,7 @@ class CheckMaxRelativeDiff(HighlightCheck):
               input_max_relative_diff < CompareConst.MAX_RELATIVE_IN_YELLOW):
             add_highlight_row_info(color_columns.yellow, num,
                                    "The output's maximum relative error exceeds 0.1, "
-                                   "while the input/parameters's is below 0.01")
+                                   "while the input/parameter's is below 0.01")
 class CheckOverflow(HighlightCheck):
@@ -146,84 +147,19 @@ class HighlightRules:
     }
     # 用于比较输入和输出的规则
+    # 真实数据检查规则
     compare_rules = {
         "check_order_magnitude": CheckOrderMagnitude(),
         "check_one_thousand_error": CheckOneThousandErrorRatio(),
         "check_cosine_similarity": CheckCosineSimilarity()
     }
+    # 统计量数据检查规则
     summary_compare_rules = {
         "check_order_magnitude": CheckOrderMagnitude(),
         "check_max_relative_diff": CheckMaxRelativeDiff(),
     }
-def check_indices_numeric(api_items, indices: list):
-    """检查指定索引处的值是否都为数字类型（int 或 float）"""
-    return all(isinstance(api_items[i], (float, int)) for i in indices)
-def apply_comparison_rules(api_info, dump_mode, color_columns):
-    """output与input/params的比较"""
-    if dump_mode == Const.SUMMARY:
-        for rule in HighlightRules.summary_compare_rules.values():
-            rule.apply(api_info, color_columns, dump_mode)
-    else:
-        for rule in HighlightRules.compare_rules.values():
-            rule.apply(api_info, color_columns, dump_mode)
-def find_error_rows(result, api_batch, highlight_dict, dump_mode):
-    """找到单个API中需要高亮的行"""
-    if dump_mode == Const.MD5:
-        return
-    npu_max_index = get_header_index(CompareConst.NPU_MAX, dump_mode)
-    bench_max_index = get_header_index(CompareConst.BENCH_MAX, dump_mode)
-    max_diff_index = get_header_index(CompareConst.MAX_DIFF if dump_mode == Const.SUMMARY
-                                      else CompareConst.MAX_ABS_ERR, dump_mode)
-    red_lines, yellow_lines = [], []
-    LineInfo = namedtuple('LineInfo', ['line_data', 'num_pointer'])
-    ApiInfo = namedtuple('ApiInfo', ['api_input', 'api_output', 'num_pointer'])
-    ColorColumns = namedtuple('ColorColumns', ['red', 'yellow'])
-    color_columns = ColorColumns(red=red_lines, yellow=yellow_lines)
-    api_batch_start = api_batch.start  # result_df的input起始全局索引
-    api_batch_params_end_index = api_batch.params_end_index  # result_df的params结束全局索引 + 1
-    api_batch_output_end_index = api_batch.output_end_index  # result_df的output结束全局索引 + 1
-    api_batch_params_slice_index_local = api_batch_params_end_index - api_batch_start  # result的params结束局部切片索引
-    api_batch_output_slice_index_local = api_batch_output_end_index - api_batch_start  # result的output结束局部切片索引
-    # 对单行API的输入或输出进行误差判断
-    for i, line in enumerate(result):
-        index = api_batch_start + i
-        line_info = LineInfo(line_data=line, num_pointer=index)
-        for rule in HighlightRules.basic_rules.values():
-            rule.apply(line_info, color_columns, dump_mode)
-    # 对API的输出与输入比较，进行误差判断
-    for n, api_out in enumerate(result[api_batch_params_slice_index_local: api_batch_output_slice_index_local]):
-        index = api_batch_start + api_batch_params_slice_index_local + n
-        # 单行检查只有溢出检查（红色），如果已经溢出，不进一步检查
-        if index in red_lines:
-            continue
-        if not check_indices_numeric(api_out, [npu_max_index, bench_max_index, max_diff_index]):
-            continue
-        # input/parameters的比较检查, 这里api_in包括input、parameters
-        for _, api_in in enumerate(result[0: api_batch_params_slice_index_local]):
-            if not check_indices_numeric(api_in, [npu_max_index, bench_max_index, max_diff_index]):
-                continue
-            api_info = ApiInfo(api_input=api_in, api_output=api_out, num_pointer=index)
-            apply_comparison_rules(api_info, dump_mode, color_columns)
-    red_lines_num_set = {x[0] for x in red_lines}
-    yellow_lines_num_set = {x[0] for x in yellow_lines}
-    highlight_dict.get('red_rows', set()).update(red_lines_num_set)
-    highlight_dict.get('yellow_rows', set()).update(yellow_lines_num_set - red_lines_num_set)
-    highlight_dict.get('red_lines', []).extend(red_lines)
-    highlight_dict.get('yellow_lines', []).extend(yellow_lines)
 class ApiBatch:
     def __init__(self, api_name: str, start: int):
         self.api_name = api_name
@@ -257,159 +193,225 @@ class ApiBatch:
         self.params_grad_end_index += 1
-def api_batches_update(api_batches, api_name, state, index):
-    """
-    当一个api的所有item更新完后，input, output的索引范围：
-    input: [start: start+input_len]
-    output: [start+input_len: output_end_index]
-    params: [output_end_index: params_end_index]
-    """
-    if not api_batches:
-        api_batches.append(ApiBatch(api_name, index))
-    else:
-        api_batch = api_batches[-1]
-        if api_batch.api_name == api_name or (
-                not re.search(Const.REGEX_FORWARD_BACKWARD, api_name) and api_name in api_batch.api_name):
-            try:
-                api_batch.increment(state)
-            except ValueError as e:
-                logger.error(f"api_batch: {api_batch} with invalid state, please check! {e}")
-                raise CompareException(CompareException.INVALID_STATE_ERROR) from e
-        else:
-            api_batches.append(ApiBatch(api_name, index))
+class HighLight:
+    def __init__(self, mode_config: ModeConfig):
+        self.mode_config = mode_config
-def find_compare_result_error_rows(result_df, highlight_dict, dump_mode):
-    """将dataframe根据API分组，并找到有误差的算子用于高亮"""
-    result = result_df.values
-    api_batches = []
-    for i, res_i in enumerate(result):
-        api_full_name = safe_get_value(res_i, 0, "res_i")
-        api_name, state = get_name_and_state(api_full_name)
-        api_batches_update(api_batches, api_name, state, i)
-    with tqdm(total=len(api_batches), desc="API/Module Analyse Progress", unit="item", ncols=100) as progress_bar:
-        for api_batch in api_batches:
-            find_error_rows(result[api_batch.start: api_batch.params_grad_end_index], api_batch, highlight_dict,
-                            dump_mode)
-            progress_bar.update(1)
-def value_check(value, api_name=None, i=None, result_df_columns=None):
-    if not table_value_is_valid(value):
-        if result_df_columns:
-            logger.error(f"Malicious value [{value}] at api_name [{api_name}], column [{result_df_columns[i]}], "
-                         f"is not allowed to be written into the compare result xlsx.")
+    @staticmethod
+    def api_batches_update(api_batches, api_name, state, index):
+        """
+        当一个api的所有item更新完后，input, output的索引范围：
+        input: [start: start+input_len]
+        output: [start+input_len: output_end_index]
+        params: [output_end_index: params_end_index]
+        """
+        if not api_batches:
+            api_batches.append(ApiBatch(api_name, index))
         else:
-            logger.error(f"Malicious value [{value}] is not allowed to be written into the compare result xlsx.")
-def df_malicious_value_check(df_chunk, result_df_columns):
-    for row in df_chunk.itertuples(index=False):
-        api_name = row[0]
-        for i, value in enumerate(row):
-            value_check(value, api_name, i, result_df_columns)
-def handle_multi_process_malicious_value_check(func, result_df):
-    result_total_nums = len(result_df)
-    process_num = int((multiprocessing.cpu_count() + 1) / 2)
-    if result_total_nums <= process_num:
-        process_num = 1
-        chunks = [result_df]
-    else:
-        chunk_size = result_total_nums // process_num
-        chunks = [result_df.iloc[i: i + chunk_size] for i in range(0, result_total_nums, chunk_size)]
-    pool = multiprocessing.Pool(process_num)
-    def err_call(args):
-        logger.error("Multiprocessing malicious value check failed! Reason: {}".format(args))
-        try:
-            pool.terminate()
-        except OSError:
-            logger.error("Pool terminate failed")
-    result_df_columns = result_df.columns.tolist()
-    for column in result_df_columns:
-        value_check(column)
-    for df_chunk in chunks:
-        pool.apply_async(func, args=(df_chunk, result_df_columns,), error_callback=err_call)
-    pool.close()
-    pool.join()
-def compare_result_df_convert(value):
-    if not isinstance(value, (float, int)) or isinstance(value, bool):  # bool类型或者非数字类型转str
-        value = f"{str(value)}\t" if str(value) in ("inf", "-inf", "nan") else str(value)
-    if isinstance(value, float):
-        value = f"{str(value)}\t" if str(value) in ("inf", "-inf", "nan") else value
-    return value
-def highlight_rows_xlsx(result_df, highlight_dict, file_path):
-    """Write and highlight results in Excel"""
+            api_batch = api_batches[-1]
+            if api_batch.api_name == api_name or (
+                    not re.search(Const.REGEX_FORWARD_BACKWARD, api_name) and api_name in api_batch.api_name):
+                try:
+                    api_batch.increment(state)
+                except ValueError as e:
+                    logger.error(f"api_batch: {api_batch} with invalid state, please check! {e}")
+                    raise CompareException(CompareException.INVALID_STATE_ERROR) from e
+            else:
+                api_batches.append(ApiBatch(api_name, index))
+    @staticmethod
+    def check_indices_numeric(api_items, indices: list):
+        """检查指定索引处的值是否都为数字类型（int 或 float）"""
+        return all(isinstance(api_items[i], (float, int)) for i in indices)
+    @staticmethod
+    def update_highlight_err_msg(result_df, highlight_dict):
+        if result_df.shape[1] <= 1:
+            return
-    update_highlight_err_msg(result_df, highlight_dict)  # add highlight err_msg
+        if CompareConst.NPU_MD5 in result_df.columns:
+            return
-    wb = openpyxl.Workbook()
-    ws = wb.active
+        err_msg = result_df.get(CompareConst.ERROR_MESSAGE)
+        red_lines_num_set = highlight_dict.get('red_rows')
+        for color in ['red', 'yellow']:
+            line_key = f'{color}_lines'
+            lines = highlight_dict.get(line_key, [])
+            for line_index, messages in lines:
+                if color == 'yellow' and line_index in red_lines_num_set:
+                    continue  # 如果是 yellow 行，且已被 red 行覆盖，跳过
+                for msg in messages:
+                    if err_msg[line_index] == '':
+                        err_msg[line_index] = msg
+                    else:
+                        err_msg[line_index] += '\n' + msg
+                if color == 'red':
+                    red_lines_num_set.add(line_index)
+        result_df[CompareConst.ERROR_MESSAGE] = err_msg
+    @staticmethod
+    def compare_result_df_convert(value):
+        if not isinstance(value, (float, int)) or isinstance(value, bool):  # bool类型或者非数字类型转str
+            value = f"{str(value)}\t" if str(value) in ("inf", "-inf", "nan") else str(value)
+        if isinstance(value, float):
+            value = f"{str(value)}\t" if str(value) in ("inf", "-inf", "nan") else value
+        return value
+    @staticmethod
+    def value_check(value, api_name=None, i=None, result_df_columns=None):
+        if not table_value_is_valid(value):
+            if result_df_columns:
+                logger.error(f"Malicious value [{value}] at api_name [{api_name}], column [{result_df_columns[i]}], "
+                             f"is not allowed to be written into the compare result xlsx.")
+            else:
+                logger.error(f"Malicious value [{value}] is not allowed to be written into the compare result xlsx.")
+    def find_compare_result_error_rows(self, result_df, highlight_dict):
+        """将dataframe根据API分组，并找到有误差的算子用于高亮"""
+        result = result_df.values
+        api_batches = []
+        for i, res_i in enumerate(result):
+            api_full_name = safe_get_value(res_i, 0, "res_i")
+            api_name, state = get_name_and_state(api_full_name)
+            self.api_batches_update(api_batches, api_name, state, i)
+        with tqdm(total=len(api_batches), desc="API/Module Analyse Progress", unit="item", ncols=100) as progress_bar:
+            for api_batch in api_batches:
+                self.find_error_rows(result[api_batch.start: api_batch.params_grad_end_index], api_batch,
+                                     highlight_dict)
+                progress_bar.update(1)
+    def find_error_rows(self, result, api_batch, highlight_dict):
+        """找到单个API中需要高亮的行"""
+        if self.mode_config.dump_mode == Const.MD5:
+            return
+        npu_max_index = get_header_index(CompareConst.NPU_MAX, self.mode_config.dump_mode)
+        bench_max_index = get_header_index(CompareConst.BENCH_MAX, self.mode_config.dump_mode)
+        max_diff_index = get_header_index(CompareConst.MAX_DIFF if self.mode_config.dump_mode == Const.SUMMARY
+                                          else CompareConst.MAX_ABS_ERR, self.mode_config.dump_mode)
+        red_lines, yellow_lines = [], []
+        LineInfo = namedtuple('LineInfo', ['line_data', 'num_pointer'])
+        ApiInfo = namedtuple('ApiInfo', ['api_input', 'api_output', 'num_pointer'])
+        ColorColumns = namedtuple('ColorColumns', ['red', 'yellow'])
+        color_columns = ColorColumns(red=red_lines, yellow=yellow_lines)
+        api_batch_start = api_batch.start  # result_df的input起始全局索引
+        api_batch_params_end_index = api_batch.params_end_index  # result_df的params结束全局索引 + 1
+        api_batch_output_end_index = api_batch.output_end_index  # result_df的output结束全局索引 + 1
+        api_batch_params_slice_index_local = api_batch_params_end_index - api_batch_start  # result的params结束局部切片索引
+        api_batch_output_slice_index_local = api_batch_output_end_index - api_batch_start  # result的output结束局部切片索引
+        # 对单行API的输入或输出进行误差判断
+        for i, line in enumerate(result):
+            index = api_batch_start + i
+            line_info = LineInfo(line_data=line, num_pointer=index)
+            for rule in HighlightRules.basic_rules.values():
+                rule.apply(line_info, color_columns, self.mode_config.dump_mode)
+        # 对API的输出与输入比较，进行误差判断
+        for n, api_out in enumerate(result[api_batch_params_slice_index_local: api_batch_output_slice_index_local]):
+            index = api_batch_start + api_batch_params_slice_index_local + n
+            # 单行检查只有溢出检查（红色），如果已经溢出，不进一步检查
+            if index in red_lines:
+                continue
+            if not self.check_indices_numeric(api_out, [npu_max_index, bench_max_index, max_diff_index]):
+                continue
-    # write header
-    logger.info('Initializing Excel file.')
+            # input/parameters的比较检查, 这里api_in包括input、parameters
+            for api_in in result[0: api_batch_params_slice_index_local]:
+                if not self.check_indices_numeric(api_in, [npu_max_index, bench_max_index, max_diff_index]):
+                    continue
+                api_info = ApiInfo(api_input=api_in, api_output=api_out, num_pointer=index)
+                self.apply_comparison_rules(api_info, color_columns)
+        red_lines_num_set = {x[0] for x in red_lines}
+        yellow_lines_num_set = {x[0] for x in yellow_lines}
+        highlight_dict.get('red_rows', set()).update(red_lines_num_set)
+        highlight_dict.get('yellow_rows', set()).update(yellow_lines_num_set - red_lines_num_set)
+        highlight_dict.get('red_lines', []).extend(red_lines)
+        highlight_dict.get('yellow_lines', []).extend(yellow_lines)
+    def apply_comparison_rules(self, api_info, color_columns):
+        """output与input/params的比较"""
+        if self.mode_config.dump_mode == Const.SUMMARY:
+            for rule in HighlightRules.summary_compare_rules.values():
+                rule.apply(api_info, color_columns, self.mode_config.dump_mode)
+        else:
+            for rule in HighlightRules.compare_rules.values():
+                rule.apply(api_info, color_columns, self.mode_config.dump_mode)
-    handle_multi_process_malicious_value_check(df_malicious_value_check, result_df)
+    def highlight_rows_xlsx(self, result_df, highlight_dict, file_path):
+        """Write and highlight results in Excel"""
-    result_df_convert = result_df.applymap(compare_result_df_convert)
+        self.update_highlight_err_msg(result_df, highlight_dict)  # add highlight err_msg
-    for row in dataframe_to_rows(result_df_convert, index=False, header=True):
-        ws.append(row)
+        wb = openpyxl.Workbook()
+        ws = wb.active
-    # 对可疑数据标色
-    logger.info('Coloring Excel in progress.')
-    col_len = len(result_df.columns)
-    red_fill = PatternFill(
-        start_color=CompareConst.RED, end_color=CompareConst.RED, fill_type="solid"
-    )
-    yellow_fill = PatternFill(
-        start_color=CompareConst.YELLOW, end_color=CompareConst.YELLOW, fill_type="solid",
-    )
-    for i in highlight_dict.get("red_rows", []):
-        for j in range(1, col_len + 1):
-            ws.cell(row=i + 2, column=j).fill = red_fill  # 2因为ws.cell中的row或column需要>=1,数据从第2行开始
-    for i in highlight_dict.get("yellow_rows", []):
-        for j in range(1, col_len + 1):
-            ws.cell(row=i + 2, column=j).fill = yellow_fill
+        # write header
+        logger.info('Initializing Excel file.')
-    logger.info('Saving Excel file to disk: %s' % file_path)
-    save_workbook(wb, file_path)
+        self.handle_multi_process_malicious_value_check(self.df_malicious_value_check, result_df)
+        result_df_convert = result_df.applymap(self.compare_result_df_convert)
-def update_highlight_err_msg(result_df, highlight_dict):
-    if result_df.shape[1] <= 1:
-        return
+        for row in dataframe_to_rows(result_df_convert, index=False, header=True):
+            ws.append(row)
-    if CompareConst.NPU_MD5 in result_df.columns:
-        return
+        # 对可疑数据标色
+        logger.info('Coloring Excel in progress.')
+        col_len = len(result_df.columns)
+        red_fill = PatternFill(
+            start_color=CompareConst.RED, end_color=CompareConst.RED, fill_type="solid"
+        )
+        yellow_fill = PatternFill(
+            start_color=CompareConst.YELLOW, end_color=CompareConst.YELLOW, fill_type="solid",
+        )
+        for i in highlight_dict.get("red_rows", []):
+            for j in range(1, col_len + 1):
+                ws.cell(row=i + 2, column=j).fill = red_fill  # 2因为ws.cell中的row或column需要>=1,数据从第2行开始
+        for i in highlight_dict.get("yellow_rows", []):
+            for j in range(1, col_len + 1):
+                ws.cell(row=i + 2, column=j).fill = yellow_fill
-    err_msg = result_df.get(CompareConst.ERROR_MESSAGE)
-    red_lines_num_set = highlight_dict.get('red_rows')
+        logger.info('Saving Excel file to disk: %s' % file_path)
+        save_workbook(wb, file_path)
-    for color in ['red', 'yellow']:
-        line_key = f'{color}_lines'
-        lines = highlight_dict.get(line_key, [])
-        for line_index, messages in lines:
-            if color == 'yellow' and line_index in red_lines_num_set:
-                continue  # 如果是 yellow 行，且已被 red 行覆盖，跳过
+    def handle_multi_process_malicious_value_check(self, func, result_df):
+        result_total_nums = len(result_df)
+        process_num = int((multiprocessing.cpu_count() + 1) / 2)
-            for msg in messages:
-                if err_msg[line_index] == '':
-                    err_msg[line_index] = msg
-                else:
-                    err_msg[line_index] += '\n' + msg
+        if result_total_nums <= process_num:
+            process_num = 1
+            chunks = [result_df]
+        else:
+            chunk_size = result_total_nums // process_num
+            chunks = [result_df.iloc[i: i + chunk_size] for i in range(0, result_total_nums, chunk_size)]
-            if color == 'red':
-                red_lines_num_set.add(line_index)
+        pool = multiprocessing.Pool(process_num)
-    result_df[CompareConst.ERROR_MESSAGE] = err_msg
+        def err_call(args):
+            logger.error("Multiprocessing malicious value check failed! Reason: {}".format(args))
+            try:
+                pool.close()
+            except OSError:
+                logger.error("Pool terminate failed")
+        result_df_columns = result_df.columns.tolist()
+        for column in result_df_columns:
+            self.value_check(column)
+        for df_chunk in chunks:
+            pool.apply_async(func, args=(df_chunk, result_df_columns,), error_callback=err_call)
+        pool.close()
+        pool.join()
+    def df_malicious_value_check(self, df_chunk, result_df_columns):
+        for row in df_chunk.itertuples(index=False):
+            api_name = row[0]
+            for i, value in enumerate(row):
+                self.value_check(value, api_name, i, result_df_columns)

msprobe/core/compare/layer_mapping/layer_mapping.py CHANGED Viewed

@@ -23,7 +23,7 @@ from msprobe.core.common.utils import (add_time_with_yaml,
                                        get_stack_construct_by_dump_json_path)
 from msprobe.core.compare.layer_mapping.data_scope_parser import get_dump_data_items
 from msprobe.core.compare.utils import read_op, reorder_op_name_list
+from msprobe.core.common.decorator import recursion_depth_decorator
 class LayerTrie:
@@ -71,6 +71,7 @@ class LayerTrie:
         file_path = os.path.join(os.path.realpath(output_path), file_name)
         save_yaml(file_path, result)
+    @recursion_depth_decorator("LayerMapping: LayerTrie.convert_to_dict", max_depth=100)
     def convert_to_dict(self, node):
         result = {}
         result["data_item"] = {st: [dt.data_name for dt in dts] for st, dts in node.data_items.items()}
@@ -163,6 +164,8 @@ def preprocess_layer_mapping(mapping):
         for key, value in name_map.items():
             key_list = key.split('.')
             prefix = key_list[0]  # 取前缀
+            value_list = value.split('(')
+            value = value_list[0]  # 取前缀
             key_len = len(key_list)
             if prefix not in final_mapping[type_name]:
                 final_mapping[type_name][prefix] = []

msprobe/core/compare/merge_result/merge_result.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -21,7 +21,8 @@ from functools import partial
 import pandas as pd
 from tqdm import tqdm
-from msprobe.core.common.file_utils import load_yaml, logger, FileChecker, save_excel, read_xlsx, create_directory
+from msprobe.core.common.file_utils import load_yaml, logger, FileChecker, save_excel, read_xlsx, create_directory, \
+    remove_path
 from msprobe.core.common.const import FileCheckConst, Const, CompareConst
 from msprobe.core.common.utils import CompareException, add_time_with_xlsx
 from msprobe.core.compare.utils import table_value_is_valid
@@ -32,8 +33,8 @@ def check_compare_result_name(file_name):
     """
     check whether the compare result name is as expected
     """
-    single_rank_pattern = r"^compare_result_rank-rank_\d{14}.xlsx$"
-    multi_ranks_pattern = r"^compare_result_rank(\d+)-rank\1_\d{14}.xlsx$"
+    single_rank_pattern = r"^compare_result_(rank|rank-rank)_\d{14}\.xlsx$"
+    multi_ranks_pattern = r"^compare_result_rank(\d+)(?:-rank\1)?_\d{14}\.xlsx$"
     if re.match(multi_ranks_pattern, file_name):
         return True
     if re.match(single_rank_pattern, file_name):
@@ -47,7 +48,7 @@ def reorder_path(compare_result_path_list):
     """
     reorder compare results by rank num
     """
-    rank_pattern = r"compare_result_rank(\d+)-rank"
+    rank_pattern = r"compare_result_rank(\d+)"
     reorder_path_list = sorted(
         compare_result_path_list,
         key=lambda path: int(re.search(rank_pattern, os.path.basename(path)).group(1))
@@ -63,6 +64,7 @@ def get_result_path(input_dir):
                                 for f in os.listdir(input_dir) if f.endswith(FileCheckConst.XLSX_SUFFIX)]
     filt_compare_result_path_list = []
     for file_path in compare_result_path_list:
+        FileChecker(file_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE).common_check()
         file_name = os.path.basename(file_path)
         if check_compare_result_name(file_name):
             compare_result_path_checker = FileChecker(file_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE)
@@ -236,7 +238,7 @@ def handle_multi_process(func, func_args, lock):
     def err_call(args):
         logger.error('Multiprocess merge result failed! Reason: {}'.format(args))
         try:
-            pool.terminate()
+            pool.close()
         except OSError:
             logger.error("Pool terminate failed")
@@ -329,6 +331,10 @@ def generate_merge_result(all_compare_index_dict_list, all_rank_num_list, all_co
     for i, df in enumerate(merge_df_list):
         # merge_df_list中df与compare_index_list中compare_index一一对应
         final_result_df_list.append((df, compare_index_list[i]))
+    if os.path.exists(output_path):
+        logger.warning(f"{output_path} will be deleted.")
+        remove_path(output_path)
     save_excel(output_path, final_result_df_list)
     logger.info(f"The compare results of the multi-ranks are merged and saved in: {output_path}.")

mindstudio-probe 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl

mindstudio-probe 1.2.2py3-none-any.whl → 8.1.0py3-none-any.whl