PyPI - mindstudio-probe - Versions diffs - 1.3.0__py3-none-any.whl → 8.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.3.0py3-none-any.whl → 8.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (213) hide show

{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/METADATA +4 -2
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/RECORD +204 -152
msprobe/README.md +32 -1
msprobe/core/__init__.py +17 -0
msprobe/core/common/const.py +120 -21
msprobe/core/common/exceptions.py +2 -2
msprobe/core/common/file_utils.py +279 -50
msprobe/core/common/framework_adapter.py +169 -0
msprobe/core/common/global_lock.py +86 -0
msprobe/core/common/runtime.py +25 -0
msprobe/core/common/utils.py +136 -45
msprobe/core/common_config.py +7 -0
msprobe/core/compare/acc_compare.py +646 -428
msprobe/core/compare/check.py +36 -103
msprobe/core/compare/compare_cli.py +4 -0
msprobe/core/compare/config.py +72 -0
msprobe/core/compare/highlight.py +215 -215
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -0
msprobe/core/compare/merge_result/merge_result.py +4 -4
msprobe/core/compare/multiprocessing_compute.py +223 -110
msprobe/core/compare/npy_compare.py +2 -4
msprobe/core/compare/utils.py +214 -244
msprobe/core/config_check/__init__.py +17 -0
msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
msprobe/core/config_check/checkers/base_checker.py +60 -0
msprobe/core/config_check/checkers/dataset_checker.py +138 -0
msprobe/core/config_check/checkers/env_args_checker.py +96 -0
msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
msprobe/core/config_check/checkers/pip_checker.py +90 -0
msprobe/core/config_check/checkers/random_checker.py +367 -0
msprobe/core/config_check/checkers/weights_checker.py +147 -0
msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
msprobe/core/config_check/config_check_cli.py +51 -0
msprobe/core/config_check/config_checker.py +100 -0
msprobe/{mindspore/runtime.py → core/config_check/resource/dependency.yaml} +7 -4
msprobe/core/config_check/resource/env.yaml +57 -0
msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
msprobe/core/config_check/utils/utils.py +107 -0
msprobe/core/data_dump/api_registry.py +67 -4
msprobe/core/data_dump/data_collector.py +170 -89
msprobe/core/data_dump/data_processor/base.py +72 -51
msprobe/core/data_dump/data_processor/mindspore_processor.py +109 -55
msprobe/core/data_dump/data_processor/pytorch_processor.py +90 -82
msprobe/core/data_dump/json_writer.py +143 -27
msprobe/core/debugger/precision_debugger.py +144 -0
msprobe/core/grad_probe/constant.py +1 -1
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/grad_probe/utils.py +1 -1
msprobe/core/hook_manager.py +242 -0
msprobe/core/monitor/anomaly_processor.py +384 -0
msprobe/core/service.py +357 -0
msprobe/core/single_save/__init__.py +0 -0
msprobe/core/single_save/single_comparator.py +243 -0
msprobe/core/single_save/single_saver.py +146 -0
msprobe/docs/01.installation.md +6 -5
msprobe/docs/02.config_introduction.md +79 -22
msprobe/docs/03.config_examples.md +1 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +118 -49
msprobe/docs/06.data_dump_MindSpore.md +167 -20
msprobe/docs/07.accuracy_checker_PyTorch.md +2 -2
msprobe/docs/08.accuracy_checker_online_PyTorch.md +69 -9
msprobe/docs/09.accuracy_checker_MindSpore.md +18 -6
msprobe/docs/10.accuracy_compare_PyTorch.md +212 -74
msprobe/docs/11.accuracy_compare_MindSpore.md +87 -37
msprobe/docs/12.overflow_check_PyTorch.md +2 -2
msprobe/docs/13.overflow_check_MindSpore.md +2 -2
msprobe/docs/14.data_parse_PyTorch.md +3 -3
msprobe/docs/17.grad_probe.md +2 -1
msprobe/docs/18.online_dispatch.md +2 -2
msprobe/docs/19.monitor.md +90 -44
msprobe/docs/21.visualization_PyTorch.md +68 -15
msprobe/docs/22.visualization_MindSpore.md +71 -18
msprobe/docs/25.tool_function_introduction.md +23 -22
msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
msprobe/docs/27.dump_json_instruction.md +1 -1
msprobe/docs/28.debugger_save_instruction.md +111 -20
msprobe/docs/29.data_dump_MSAdapter.md +2 -2
msprobe/docs/30.overflow_check_MSAdapter.md +2 -2
msprobe/docs/31.config_check.md +95 -0
msprobe/docs/32.ckpt_compare.md +69 -0
msprobe/docs/33.generate_operator_MindSpore.md +181 -0
msprobe/docs/34.RL_collect.md +92 -0
msprobe/docs/35.nan_analyze.md +72 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/save_compare_result_sample.png +0 -0
msprobe/docs/img/visualization/proxy.png +0 -0
msprobe/mindspore/__init__.py +1 -2
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +150 -58
msprobe/mindspore/api_accuracy_checker/api_runner.py +7 -3
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +47 -69
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +0 -1
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -2
msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +460 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +9 -0
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/cell_processor.py +204 -33
msprobe/mindspore/code_mapping/graph_parser.py +4 -21
msprobe/mindspore/common/const.py +17 -7
msprobe/mindspore/common/utils.py +128 -11
msprobe/mindspore/compare/common_dir_compare.py +382 -0
msprobe/mindspore/compare/distributed_compare.py +2 -26
msprobe/mindspore/compare/ms_compare.py +17 -405
msprobe/mindspore/compare/ms_graph_compare.py +14 -5
msprobe/mindspore/compare/utils.py +37 -0
msprobe/mindspore/debugger/debugger_config.py +53 -3
msprobe/mindspore/debugger/precision_debugger.py +72 -91
msprobe/mindspore/dump/cell_dump_process.py +877 -0
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +864 -0
msprobe/mindspore/dump/dump_tool_factory.py +13 -5
msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
msprobe/mindspore/dump/hook_cell/api_register.py +40 -6
msprobe/mindspore/dump/hook_cell/hook_cell.py +18 -7
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +18 -0
msprobe/mindspore/dump/jit_dump.py +21 -18
msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -15
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +12 -6
msprobe/mindspore/free_benchmark/common/utils.py +1 -1
msprobe/mindspore/grad_probe/global_context.py +7 -2
msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
msprobe/mindspore/mindspore_service.py +114 -0
msprobe/mindspore/monitor/common_func.py +52 -0
msprobe/mindspore/monitor/data_writers.py +237 -0
msprobe/mindspore/monitor/features.py +20 -7
msprobe/mindspore/monitor/module_hook.py +281 -209
msprobe/mindspore/monitor/optimizer_collect.py +334 -0
msprobe/mindspore/monitor/utils.py +25 -5
msprobe/mindspore/ms_config.py +16 -15
msprobe/mindspore/task_handler_factory.py +5 -2
msprobe/msprobe.py +19 -0
msprobe/nan_analyze/__init__.py +14 -0
msprobe/nan_analyze/analyzer.py +255 -0
msprobe/nan_analyze/graph.py +189 -0
msprobe/nan_analyze/utils.py +211 -0
msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +20 -20
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +4 -7
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +204 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +12 -11
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +1 -0
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +8 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +2 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +156 -0
msprobe/pytorch/attl_manager.py +65 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
msprobe/pytorch/common/utils.py +26 -14
msprobe/pytorch/compare/distributed_compare.py +4 -36
msprobe/pytorch/compare/pt_compare.py +13 -84
msprobe/pytorch/compare/utils.py +47 -0
msprobe/pytorch/debugger/debugger_config.py +34 -17
msprobe/pytorch/debugger/precision_debugger.py +66 -118
msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
msprobe/pytorch/dump/module_dump/module_dump.py +11 -58
msprobe/pytorch/dump/module_dump/module_processer.py +143 -113
msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
msprobe/pytorch/hook_module/api_register.py +29 -5
msprobe/pytorch/hook_module/hook_module.py +9 -18
msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +22 -1
msprobe/pytorch/hook_module/utils.py +28 -2
msprobe/pytorch/monitor/csv2tb.py +6 -2
msprobe/pytorch/monitor/data_writers.py +259 -0
msprobe/pytorch/monitor/module_hook.py +227 -158
msprobe/pytorch/monitor/module_metric.py +14 -0
msprobe/pytorch/monitor/optimizer_collect.py +242 -270
msprobe/pytorch/monitor/utils.py +16 -3
msprobe/pytorch/online_dispatch/dispatch.py +4 -2
msprobe/pytorch/online_dispatch/dump_compare.py +5 -2
msprobe/pytorch/parse_tool/lib/utils.py +3 -3
msprobe/pytorch/pt_config.py +8 -7
msprobe/pytorch/pytorch_service.py +73 -0
msprobe/visualization/builder/graph_builder.py +33 -13
msprobe/visualization/builder/msprobe_adapter.py +24 -11
msprobe/visualization/compare/graph_comparator.py +53 -45
msprobe/visualization/compare/mode_adapter.py +31 -1
msprobe/visualization/graph/base_node.py +3 -3
msprobe/visualization/graph/graph.py +2 -2
msprobe/visualization/graph_service.py +250 -103
msprobe/visualization/utils.py +27 -11
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -106
msprobe/mindspore/monitor/anomaly_detect.py +0 -404
msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
msprobe/mindspore/service.py +0 -549
msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
msprobe/pytorch/monitor/anomaly_detect.py +0 -410
msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
msprobe/pytorch/service.py +0 -473
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
/msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
/msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0

msprobe/core/compare/highlight.py CHANGED Viewed

@@ -30,12 +30,7 @@ from msprobe.core.common.file_utils import save_workbook
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import get_header_index, safe_get_value
 from msprobe.core.compare.utils import table_value_is_valid, get_name_and_state, CompareException
-class HighlightCheck(abc.ABC):
-    @abc.abstractmethod
-    def apply(self, info, color_columns, dump_mode):
-        raise NotImplementedError
+from msprobe.core.compare.config import ModeConfig
 def add_highlight_row_info(color_list, num, highlight_err_msg):
@@ -46,6 +41,12 @@ def add_highlight_row_info(color_list, num, highlight_err_msg):
     color_list.append((num, [highlight_err_msg]))
+class HighlightCheck(abc.ABC):
+    @abc.abstractmethod
+    def apply(self, info, color_columns, dump_mode):
+        raise NotImplementedError
 class CheckOrderMagnitude(HighlightCheck):
     """检查Max diff的数量级差异"""
@@ -75,12 +76,12 @@ class CheckOneThousandErrorRatio(HighlightCheck):
         if (api_in[one_thousand_index] > CompareConst.ONE_THOUSAND_ERROR_IN_RED and
                 api_out[one_thousand_index] < CompareConst.ONE_THOUSAND_ERROR_OUT_RED):
             add_highlight_row_info(color_columns.red, num,
-                                   "The input/parameters's one thousandth err ratio exceeds 0.9, "
+                                   "The input/parameter's one thousandth err ratio exceeds 0.9, "
                                    "while the output's is below 0.6")
         elif api_in[one_thousand_index] - api_out[one_thousand_index] > CompareConst.ONE_THOUSAND_ERROR_DIFF_YELLOW:
             add_highlight_row_info(color_columns.yellow, num,
                                    "The output's one thousandth err ratio decreases by more than 0.1 "
-                                   "compared to the input/parameters's")
+                                   "compared to the input/parameter's")
 class CheckCosineSimilarity(HighlightCheck):
@@ -94,7 +95,7 @@ class CheckCosineSimilarity(HighlightCheck):
         if api_in[cosine_index] - api_out[cosine_index] > CompareConst.COSINE_DIFF_YELLOW:
             add_highlight_row_info(color_columns.yellow, num,
                                    "The output's cosine decreases by more than 0.1 "
-                                   "compared to the input/parameters's")
+                                   "compared to the input/parameter's")
 class CheckMaxRelativeDiff(HighlightCheck):
@@ -117,7 +118,7 @@ class CheckMaxRelativeDiff(HighlightCheck):
               input_max_relative_diff < CompareConst.MAX_RELATIVE_IN_YELLOW):
             add_highlight_row_info(color_columns.yellow, num,
                                    "The output's maximum relative error exceeds 0.1, "
-                                   "while the input/parameters's is below 0.01")
+                                   "while the input/parameter's is below 0.01")
 class CheckOverflow(HighlightCheck):
@@ -159,73 +160,6 @@ class HighlightRules:
     }
-def check_indices_numeric(api_items, indices: list):
-    """检查指定索引处的值是否都为数字类型（int 或 float）"""
-    return all(isinstance(api_items[i], (float, int)) for i in indices)
-def apply_comparison_rules(api_info, dump_mode, color_columns):
-    """output与input/params的比较"""
-    if dump_mode == Const.SUMMARY:
-        for rule in HighlightRules.summary_compare_rules.values():
-            rule.apply(api_info, color_columns, dump_mode)
-    else:
-        for rule in HighlightRules.compare_rules.values():
-            rule.apply(api_info, color_columns, dump_mode)
-def find_error_rows(result, api_batch, highlight_dict, dump_mode):
-    """找到单个API中需要高亮的行"""
-    if dump_mode == Const.MD5:
-        return
-    npu_max_index = get_header_index(CompareConst.NPU_MAX, dump_mode)
-    bench_max_index = get_header_index(CompareConst.BENCH_MAX, dump_mode)
-    max_diff_index = get_header_index(CompareConst.MAX_DIFF if dump_mode == Const.SUMMARY
-                                      else CompareConst.MAX_ABS_ERR, dump_mode)
-    red_lines, yellow_lines = [], []
-    LineInfo = namedtuple('LineInfo', ['line_data', 'num_pointer'])
-    ApiInfo = namedtuple('ApiInfo', ['api_input', 'api_output', 'num_pointer'])
-    ColorColumns = namedtuple('ColorColumns', ['red', 'yellow'])
-    color_columns = ColorColumns(red=red_lines, yellow=yellow_lines)
-    api_batch_start = api_batch.start  # result_df的input起始全局索引
-    api_batch_params_end_index = api_batch.params_end_index  # result_df的params结束全局索引 + 1
-    api_batch_output_end_index = api_batch.output_end_index  # result_df的output结束全局索引 + 1
-    api_batch_params_slice_index_local = api_batch_params_end_index - api_batch_start  # result的params结束局部切片索引
-    api_batch_output_slice_index_local = api_batch_output_end_index - api_batch_start  # result的output结束局部切片索引
-    # 对单行API的输入或输出进行误差判断
-    for i, line in enumerate(result):
-        index = api_batch_start + i
-        line_info = LineInfo(line_data=line, num_pointer=index)
-        for rule in HighlightRules.basic_rules.values():
-            rule.apply(line_info, color_columns, dump_mode)
-    # 对API的输出与输入比较，进行误差判断
-    for n, api_out in enumerate(result[api_batch_params_slice_index_local: api_batch_output_slice_index_local]):
-        index = api_batch_start + api_batch_params_slice_index_local + n
-        # 单行检查只有溢出检查（红色），如果已经溢出，不进一步检查
-        if index in red_lines:
-            continue
-        if not check_indices_numeric(api_out, [npu_max_index, bench_max_index, max_diff_index]):
-            continue
-        # input/parameters的比较检查, 这里api_in包括input、parameters
-        for _, api_in in enumerate(result[0: api_batch_params_slice_index_local]):
-            if not check_indices_numeric(api_in, [npu_max_index, bench_max_index, max_diff_index]):
-                continue
-            api_info = ApiInfo(api_input=api_in, api_output=api_out, num_pointer=index)
-            apply_comparison_rules(api_info, dump_mode, color_columns)
-    red_lines_num_set = {x[0] for x in red_lines}
-    yellow_lines_num_set = {x[0] for x in yellow_lines}
-    highlight_dict.get('red_rows', set()).update(red_lines_num_set)
-    highlight_dict.get('yellow_rows', set()).update(yellow_lines_num_set - red_lines_num_set)
-    highlight_dict.get('red_lines', []).extend(red_lines)
-    highlight_dict.get('yellow_lines', []).extend(yellow_lines)
 class ApiBatch:
     def __init__(self, api_name: str, start: int):
         self.api_name = api_name
@@ -259,159 +193,225 @@ class ApiBatch:
         self.params_grad_end_index += 1
-def api_batches_update(api_batches, api_name, state, index):
-    """
-    当一个api的所有item更新完后，input, output的索引范围：
-    input: [start: start+input_len]
-    output: [start+input_len: output_end_index]
-    params: [output_end_index: params_end_index]
-    """
-    if not api_batches:
-        api_batches.append(ApiBatch(api_name, index))
-    else:
-        api_batch = api_batches[-1]
-        if api_batch.api_name == api_name or (
-                not re.search(Const.REGEX_FORWARD_BACKWARD, api_name) and api_name in api_batch.api_name):
-            try:
-                api_batch.increment(state)
-            except ValueError as e:
-                logger.error(f"api_batch: {api_batch} with invalid state, please check! {e}")
-                raise CompareException(CompareException.INVALID_STATE_ERROR) from e
-        else:
-            api_batches.append(ApiBatch(api_name, index))
+class HighLight:
+    def __init__(self, mode_config: ModeConfig):
+        self.mode_config = mode_config
-def find_compare_result_error_rows(result_df, highlight_dict, dump_mode):
-    """将dataframe根据API分组，并找到有误差的算子用于高亮"""
-    result = result_df.values
-    api_batches = []
-    for i, res_i in enumerate(result):
-        api_full_name = safe_get_value(res_i, 0, "res_i")
-        api_name, state = get_name_and_state(api_full_name)
-        api_batches_update(api_batches, api_name, state, i)
-    with tqdm(total=len(api_batches), desc="API/Module Analyse Progress", unit="item", ncols=100) as progress_bar:
-        for api_batch in api_batches:
-            find_error_rows(result[api_batch.start: api_batch.params_grad_end_index], api_batch, highlight_dict,
-                            dump_mode)
-            progress_bar.update(1)
-def value_check(value, api_name=None, i=None, result_df_columns=None):
-    if not table_value_is_valid(value):
-        if result_df_columns:
-            logger.error(f"Malicious value [{value}] at api_name [{api_name}], column [{result_df_columns[i]}], "
-                         f"is not allowed to be written into the compare result xlsx.")
+    @staticmethod
+    def api_batches_update(api_batches, api_name, state, index):
+        """
+        当一个api的所有item更新完后，input, output的索引范围：
+        input: [start: start+input_len]
+        output: [start+input_len: output_end_index]
+        params: [output_end_index: params_end_index]
+        """
+        if not api_batches:
+            api_batches.append(ApiBatch(api_name, index))
         else:
-            logger.error(f"Malicious value [{value}] is not allowed to be written into the compare result xlsx.")
-def df_malicious_value_check(df_chunk, result_df_columns):
-    for row in df_chunk.itertuples(index=False):
-        api_name = row[0]
-        for i, value in enumerate(row):
-            value_check(value, api_name, i, result_df_columns)
-def handle_multi_process_malicious_value_check(func, result_df):
-    result_total_nums = len(result_df)
-    process_num = int((multiprocessing.cpu_count() + 1) / 2)
-    if result_total_nums <= process_num:
-        process_num = 1
-        chunks = [result_df]
-    else:
-        chunk_size = result_total_nums // process_num
-        chunks = [result_df.iloc[i: i + chunk_size] for i in range(0, result_total_nums, chunk_size)]
-    pool = multiprocessing.Pool(process_num)
-    def err_call(args):
-        logger.error("Multiprocessing malicious value check failed! Reason: {}".format(args))
-        try:
-            pool.terminate()
-        except OSError:
-            logger.error("Pool terminate failed")
-    result_df_columns = result_df.columns.tolist()
-    for column in result_df_columns:
-        value_check(column)
-    for df_chunk in chunks:
-        pool.apply_async(func, args=(df_chunk, result_df_columns,), error_callback=err_call)
-    pool.close()
-    pool.join()
-def compare_result_df_convert(value):
-    if not isinstance(value, (float, int)) or isinstance(value, bool):  # bool类型或者非数字类型转str
-        value = f"{str(value)}\t" if str(value) in ("inf", "-inf", "nan") else str(value)
-    if isinstance(value, float):
-        value = f"{str(value)}\t" if str(value) in ("inf", "-inf", "nan") else value
-    return value
-def highlight_rows_xlsx(result_df, highlight_dict, file_path):
-    """Write and highlight results in Excel"""
+            api_batch = api_batches[-1]
+            if api_batch.api_name == api_name or (
+                    not re.search(Const.REGEX_FORWARD_BACKWARD, api_name) and api_name in api_batch.api_name):
+                try:
+                    api_batch.increment(state)
+                except ValueError as e:
+                    logger.error(f"api_batch: {api_batch} with invalid state, please check! {e}")
+                    raise CompareException(CompareException.INVALID_STATE_ERROR) from e
+            else:
+                api_batches.append(ApiBatch(api_name, index))
+    @staticmethod
+    def check_indices_numeric(api_items, indices: list):
+        """检查指定索引处的值是否都为数字类型（int 或 float）"""
+        return all(isinstance(api_items[i], (float, int)) for i in indices)
+    @staticmethod
+    def update_highlight_err_msg(result_df, highlight_dict):
+        if result_df.shape[1] <= 1:
+            return
-    update_highlight_err_msg(result_df, highlight_dict)  # add highlight err_msg
+        if CompareConst.NPU_MD5 in result_df.columns:
+            return
-    wb = openpyxl.Workbook()
-    ws = wb.active
+        err_msg = result_df.get(CompareConst.ERROR_MESSAGE)
+        red_lines_num_set = highlight_dict.get('red_rows')
+        for color in ['red', 'yellow']:
+            line_key = f'{color}_lines'
+            lines = highlight_dict.get(line_key, [])
+            for line_index, messages in lines:
+                if color == 'yellow' and line_index in red_lines_num_set:
+                    continue  # 如果是 yellow 行，且已被 red 行覆盖，跳过
+                for msg in messages:
+                    if err_msg[line_index] == '':
+                        err_msg[line_index] = msg
+                    else:
+                        err_msg[line_index] += '\n' + msg
+                if color == 'red':
+                    red_lines_num_set.add(line_index)
+        result_df[CompareConst.ERROR_MESSAGE] = err_msg
+    @staticmethod
+    def compare_result_df_convert(value):
+        if not isinstance(value, (float, int)) or isinstance(value, bool):  # bool类型或者非数字类型转str
+            value = f"{str(value)}\t" if str(value) in ("inf", "-inf", "nan") else str(value)
+        if isinstance(value, float):
+            value = f"{str(value)}\t" if str(value) in ("inf", "-inf", "nan") else value
+        return value
+    @staticmethod
+    def value_check(value, api_name=None, i=None, result_df_columns=None):
+        if not table_value_is_valid(value):
+            if result_df_columns:
+                logger.error(f"Malicious value [{value}] at api_name [{api_name}], column [{result_df_columns[i]}], "
+                             f"is not allowed to be written into the compare result xlsx.")
+            else:
+                logger.error(f"Malicious value [{value}] is not allowed to be written into the compare result xlsx.")
+    def find_compare_result_error_rows(self, result_df, highlight_dict):
+        """将dataframe根据API分组，并找到有误差的算子用于高亮"""
+        result = result_df.values
+        api_batches = []
+        for i, res_i in enumerate(result):
+            api_full_name = safe_get_value(res_i, 0, "res_i")
+            api_name, state = get_name_and_state(api_full_name)
+            self.api_batches_update(api_batches, api_name, state, i)
+        with tqdm(total=len(api_batches), desc="API/Module Analyse Progress", unit="item", ncols=100) as progress_bar:
+            for api_batch in api_batches:
+                self.find_error_rows(result[api_batch.start: api_batch.params_grad_end_index], api_batch,
+                                     highlight_dict)
+                progress_bar.update(1)
+    def find_error_rows(self, result, api_batch, highlight_dict):
+        """找到单个API中需要高亮的行"""
+        if self.mode_config.dump_mode == Const.MD5:
+            return
+        npu_max_index = get_header_index(CompareConst.NPU_MAX, self.mode_config.dump_mode)
+        bench_max_index = get_header_index(CompareConst.BENCH_MAX, self.mode_config.dump_mode)
+        max_diff_index = get_header_index(CompareConst.MAX_DIFF if self.mode_config.dump_mode == Const.SUMMARY
+                                          else CompareConst.MAX_ABS_ERR, self.mode_config.dump_mode)
+        red_lines, yellow_lines = [], []
+        LineInfo = namedtuple('LineInfo', ['line_data', 'num_pointer'])
+        ApiInfo = namedtuple('ApiInfo', ['api_input', 'api_output', 'num_pointer'])
+        ColorColumns = namedtuple('ColorColumns', ['red', 'yellow'])
+        color_columns = ColorColumns(red=red_lines, yellow=yellow_lines)
+        api_batch_start = api_batch.start  # result_df的input起始全局索引
+        api_batch_params_end_index = api_batch.params_end_index  # result_df的params结束全局索引 + 1
+        api_batch_output_end_index = api_batch.output_end_index  # result_df的output结束全局索引 + 1
+        api_batch_params_slice_index_local = api_batch_params_end_index - api_batch_start  # result的params结束局部切片索引
+        api_batch_output_slice_index_local = api_batch_output_end_index - api_batch_start  # result的output结束局部切片索引
+        # 对单行API的输入或输出进行误差判断
+        for i, line in enumerate(result):
+            index = api_batch_start + i
+            line_info = LineInfo(line_data=line, num_pointer=index)
+            for rule in HighlightRules.basic_rules.values():
+                rule.apply(line_info, color_columns, self.mode_config.dump_mode)
+        # 对API的输出与输入比较，进行误差判断
+        for n, api_out in enumerate(result[api_batch_params_slice_index_local: api_batch_output_slice_index_local]):
+            index = api_batch_start + api_batch_params_slice_index_local + n
+            # 单行检查只有溢出检查（红色），如果已经溢出，不进一步检查
+            if index in red_lines:
+                continue
+            if not self.check_indices_numeric(api_out, [npu_max_index, bench_max_index, max_diff_index]):
+                continue
-    # write header
-    logger.info('Initializing Excel file.')
+            # input/parameters的比较检查, 这里api_in包括input、parameters
+            for api_in in result[0: api_batch_params_slice_index_local]:
+                if not self.check_indices_numeric(api_in, [npu_max_index, bench_max_index, max_diff_index]):
+                    continue
+                api_info = ApiInfo(api_input=api_in, api_output=api_out, num_pointer=index)
+                self.apply_comparison_rules(api_info, color_columns)
+        red_lines_num_set = {x[0] for x in red_lines}
+        yellow_lines_num_set = {x[0] for x in yellow_lines}
+        highlight_dict.get('red_rows', set()).update(red_lines_num_set)
+        highlight_dict.get('yellow_rows', set()).update(yellow_lines_num_set - red_lines_num_set)
+        highlight_dict.get('red_lines', []).extend(red_lines)
+        highlight_dict.get('yellow_lines', []).extend(yellow_lines)
+    def apply_comparison_rules(self, api_info, color_columns):
+        """output与input/params的比较"""
+        if self.mode_config.dump_mode == Const.SUMMARY:
+            for rule in HighlightRules.summary_compare_rules.values():
+                rule.apply(api_info, color_columns, self.mode_config.dump_mode)
+        else:
+            for rule in HighlightRules.compare_rules.values():
+                rule.apply(api_info, color_columns, self.mode_config.dump_mode)
-    handle_multi_process_malicious_value_check(df_malicious_value_check, result_df)
+    def highlight_rows_xlsx(self, result_df, highlight_dict, file_path):
+        """Write and highlight results in Excel"""
-    result_df_convert = result_df.applymap(compare_result_df_convert)
+        self.update_highlight_err_msg(result_df, highlight_dict)  # add highlight err_msg
-    for row in dataframe_to_rows(result_df_convert, index=False, header=True):
-        ws.append(row)
+        wb = openpyxl.Workbook()
+        ws = wb.active
-    # 对可疑数据标色
-    logger.info('Coloring Excel in progress.')
-    col_len = len(result_df.columns)
-    red_fill = PatternFill(
-        start_color=CompareConst.RED, end_color=CompareConst.RED, fill_type="solid"
-    )
-    yellow_fill = PatternFill(
-        start_color=CompareConst.YELLOW, end_color=CompareConst.YELLOW, fill_type="solid",
-    )
-    for i in highlight_dict.get("red_rows", []):
-        for j in range(1, col_len + 1):
-            ws.cell(row=i + 2, column=j).fill = red_fill  # 2因为ws.cell中的row或column需要>=1,数据从第2行开始
-    for i in highlight_dict.get("yellow_rows", []):
-        for j in range(1, col_len + 1):
-            ws.cell(row=i + 2, column=j).fill = yellow_fill
+        # write header
+        logger.info('Initializing Excel file.')
-    logger.info('Saving Excel file to disk: %s' % file_path)
-    save_workbook(wb, file_path)
+        self.handle_multi_process_malicious_value_check(self.df_malicious_value_check, result_df)
+        result_df_convert = result_df.applymap(self.compare_result_df_convert)
-def update_highlight_err_msg(result_df, highlight_dict):
-    if result_df.shape[1] <= 1:
-        return
+        for row in dataframe_to_rows(result_df_convert, index=False, header=True):
+            ws.append(row)
-    if CompareConst.NPU_MD5 in result_df.columns:
-        return
+        # 对可疑数据标色
+        logger.info('Coloring Excel in progress.')
+        col_len = len(result_df.columns)
+        red_fill = PatternFill(
+            start_color=CompareConst.RED, end_color=CompareConst.RED, fill_type="solid"
+        )
+        yellow_fill = PatternFill(
+            start_color=CompareConst.YELLOW, end_color=CompareConst.YELLOW, fill_type="solid",
+        )
+        for i in highlight_dict.get("red_rows", []):
+            for j in range(1, col_len + 1):
+                ws.cell(row=i + 2, column=j).fill = red_fill  # 2因为ws.cell中的row或column需要>=1,数据从第2行开始
+        for i in highlight_dict.get("yellow_rows", []):
+            for j in range(1, col_len + 1):
+                ws.cell(row=i + 2, column=j).fill = yellow_fill
-    err_msg = result_df.get(CompareConst.ERROR_MESSAGE)
-    red_lines_num_set = highlight_dict.get('red_rows')
+        logger.info('Saving Excel file to disk: %s' % file_path)
+        save_workbook(wb, file_path)
-    for color in ['red', 'yellow']:
-        line_key = f'{color}_lines'
-        lines = highlight_dict.get(line_key, [])
-        for line_index, messages in lines:
-            if color == 'yellow' and line_index in red_lines_num_set:
-                continue  # 如果是 yellow 行，且已被 red 行覆盖，跳过
+    def handle_multi_process_malicious_value_check(self, func, result_df):
+        result_total_nums = len(result_df)
+        process_num = int((multiprocessing.cpu_count() + 1) / 2)
-            for msg in messages:
-                if err_msg[line_index] == '':
-                    err_msg[line_index] = msg
-                else:
-                    err_msg[line_index] += '\n' + msg
+        if result_total_nums <= process_num:
+            process_num = 1
+            chunks = [result_df]
+        else:
+            chunk_size = result_total_nums // process_num
+            chunks = [result_df.iloc[i: i + chunk_size] for i in range(0, result_total_nums, chunk_size)]
-            if color == 'red':
-                red_lines_num_set.add(line_index)
+        pool = multiprocessing.Pool(process_num)
-    result_df[CompareConst.ERROR_MESSAGE] = err_msg
+        def err_call(args):
+            logger.error("Multiprocessing malicious value check failed! Reason: {}".format(args))
+            try:
+                pool.close()
+            except OSError:
+                logger.error("Pool terminate failed")
+        result_df_columns = result_df.columns.tolist()
+        for column in result_df_columns:
+            self.value_check(column)
+        for df_chunk in chunks:
+            pool.apply_async(func, args=(df_chunk, result_df_columns,), error_callback=err_call)
+        pool.close()
+        pool.join()
+    def df_malicious_value_check(self, df_chunk, result_df_columns):
+        for row in df_chunk.itertuples(index=False):
+            api_name = row[0]
+            for i, value in enumerate(row):
+                self.value_check(value, api_name, i, result_df_columns)

msprobe/core/compare/layer_mapping/layer_mapping.py CHANGED Viewed

@@ -164,6 +164,8 @@ def preprocess_layer_mapping(mapping):
         for key, value in name_map.items():
             key_list = key.split('.')
             prefix = key_list[0]  # 取前缀
+            value_list = value.split('(')
+            value = value_list[0]  # 取前缀
             key_len = len(key_list)
             if prefix not in final_mapping[type_name]:
                 final_mapping[type_name][prefix] = []

msprobe/core/compare/merge_result/merge_result.py CHANGED Viewed

@@ -33,8 +33,8 @@ def check_compare_result_name(file_name):
     """
     check whether the compare result name is as expected
     """
-    single_rank_pattern = r"^compare_result_rank-rank_\d{14}.xlsx$"
-    multi_ranks_pattern = r"^compare_result_rank(\d+)-rank\1_\d{14}.xlsx$"
+    single_rank_pattern = r"^compare_result_(rank|rank-rank)_\d{14}\.xlsx$"
+    multi_ranks_pattern = r"^compare_result_rank(\d+)(?:-rank\1)?_\d{14}\.xlsx$"
     if re.match(multi_ranks_pattern, file_name):
         return True
     if re.match(single_rank_pattern, file_name):
@@ -48,7 +48,7 @@ def reorder_path(compare_result_path_list):
     """
     reorder compare results by rank num
     """
-    rank_pattern = r"compare_result_rank(\d+)-rank"
+    rank_pattern = r"compare_result_rank(\d+)"
     reorder_path_list = sorted(
         compare_result_path_list,
         key=lambda path: int(re.search(rank_pattern, os.path.basename(path)).group(1))
@@ -238,7 +238,7 @@ def handle_multi_process(func, func_args, lock):
     def err_call(args):
         logger.error('Multiprocess merge result failed! Reason: {}'.format(args))
         try:
-            pool.terminate()
+            pool.close()
         except OSError:
             logger.error("Pool terminate failed")

mindstudio-probe 1.3.0__py3-none-any.whl → 8.1.1__py3-none-any.whl

mindstudio-probe 1.3.0py3-none-any.whl → 8.1.1py3-none-any.whl