PyPI - mindstudio-probe - Versions diffs - 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.2py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/METADATA +3 -3
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/RECORD +143 -144
msprobe/README.md +25 -20
msprobe/core/common/const.py +110 -66
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +25 -2
msprobe/core/common/utils.py +30 -34
msprobe/core/compare/acc_compare.py +43 -74
msprobe/core/compare/check.py +2 -6
msprobe/core/compare/highlight.py +2 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -1
msprobe/core/compare/merge_result/merge_result.py +8 -2
msprobe/core/compare/multiprocessing_compute.py +19 -12
msprobe/core/compare/npy_compare.py +30 -12
msprobe/core/compare/utils.py +20 -10
msprobe/core/data_dump/api_registry.py +176 -0
msprobe/core/data_dump/data_processor/base.py +2 -2
msprobe/core/data_dump/data_processor/mindspore_processor.py +19 -32
msprobe/core/data_dump/data_processor/pytorch_processor.py +45 -15
msprobe/core/data_dump/json_writer.py +38 -35
msprobe/core/grad_probe/constant.py +1 -0
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/docs/01.installation.md +2 -1
msprobe/docs/02.config_introduction.md +17 -15
msprobe/docs/05.data_dump_PyTorch.md +70 -2
msprobe/docs/06.data_dump_MindSpore.md +33 -12
msprobe/docs/07.accuracy_checker_PyTorch.md +11 -1
msprobe/docs/08.accuracy_checker_online_PyTorch.md +3 -1
msprobe/docs/09.accuracy_checker_MindSpore.md +1 -1
msprobe/docs/10.accuracy_compare_PyTorch.md +59 -33
msprobe/docs/11.accuracy_compare_MindSpore.md +40 -16
msprobe/docs/12.overflow_check_PyTorch.md +3 -1
msprobe/docs/13.overflow_check_MindSpore.md +4 -2
msprobe/docs/14.data_parse_PyTorch.md +1 -7
msprobe/docs/18.online_dispatch.md +1 -1
msprobe/docs/19.monitor.md +124 -62
msprobe/docs/21.visualization_PyTorch.md +32 -13
msprobe/docs/22.visualization_MindSpore.md +32 -13
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/27.dump_json_instruction.md +278 -8
msprobe/docs/28.kernel_dump_MindSpore.md +1 -1
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +4 -3
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +6 -1
msprobe/mindspore/api_accuracy_checker/api_runner.py +19 -9
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +602 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -1
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +2 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/common/const.py +61 -0
msprobe/mindspore/common/utils.py +31 -19
msprobe/mindspore/compare/ms_compare.py +27 -19
msprobe/mindspore/compare/ms_graph_compare.py +6 -5
msprobe/mindspore/debugger/debugger_config.py +6 -4
msprobe/mindspore/debugger/precision_debugger.py +22 -10
msprobe/mindspore/dump/dump_tool_factory.py +5 -3
msprobe/mindspore/dump/hook_cell/api_register.py +142 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +9 -10
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +24 -26
msprobe/mindspore/dump/jit_dump.py +14 -9
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +22 -56
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -1
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +10 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +2 -0
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/monitor/distributed/wrap_distributed.py +1 -1
msprobe/mindspore/monitor/module_hook.py +354 -302
msprobe/mindspore/monitor/utils.py +46 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/service.py +23 -17
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +11 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +4 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +5 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +25 -6
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -19
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +6 -0
msprobe/pytorch/common/utils.py +29 -7
msprobe/pytorch/debugger/precision_debugger.py +10 -1
msprobe/pytorch/dump/module_dump/module_dump.py +4 -3
msprobe/pytorch/dump/module_dump/module_processer.py +12 -6
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +1 -1
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/hook_module/api_register.py +131 -0
msprobe/pytorch/hook_module/hook_module.py +19 -14
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +172 -75
msprobe/pytorch/monitor/csv2tb.py +8 -2
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +131 -105
msprobe/pytorch/monitor/module_metric.py +3 -0
msprobe/pytorch/monitor/optimizer_collect.py +55 -4
msprobe/pytorch/monitor/unittest/test_monitor.py +1 -1
msprobe/pytorch/monitor/utils.py +68 -1
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +9 -0
msprobe/pytorch/online_dispatch/dump_compare.py +3 -0
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +2 -1
msprobe/pytorch/pt_config.py +11 -7
msprobe/pytorch/service.py +11 -8
msprobe/visualization/builder/graph_builder.py +44 -5
msprobe/visualization/builder/msprobe_adapter.py +0 -1
msprobe/visualization/compare/graph_comparator.py +42 -38
msprobe/visualization/compare/mode_adapter.py +0 -19
msprobe/visualization/graph/base_node.py +8 -1
msprobe/visualization/graph/distributed_analyzer.py +1 -10
msprobe/visualization/graph/graph.py +0 -11
msprobe/visualization/graph/node_op.py +1 -2
msprobe/visualization/graph_service.py +1 -1
msprobe/visualization/utils.py +2 -33
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -207
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -79
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
msprobe/pytorch/parse.py +0 -19
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/top_level.txt +0 -0

msprobe/mindspore/common/const.py CHANGED Viewed

@@ -70,6 +70,67 @@ class Const:
     }
+class MsCompareConst:
+    # api_info field
+    MINT = "Mint"
+    MINT_FUNCTIONAL = "MintFunctional"
+    TENSOR_API = "Tensor"
+    FUNCTIONAL_API = "Functional"
+    FUSION_API = "FUSION"
+    API_NAME_STR_LENGTH = 4
+    MAX_RECURSION_DEPTH = 20
+    # Mindtorch api_info field
+    MINDTORCH_TENSOR = "Tensor"
+    MINDTORCH = "Torch"
+    MINDTORCH_FUNC = "Functional"
+    MINDTORCH_NPU = "NPU"
+    MINDTORCH_DIST = "Distributed"
+    MT_VALID_API_TYPES = [
+        MINDTORCH, MINDTORCH_FUNC, MINDTORCH_TENSOR
+    ]
+    SUPPORTED_FUSION_LIST = ["flash_attention_score"]
+    TASK_FIELD = "task"
+    STATISTICS_TASK = "statistics"
+    FRAMEWORK = "framework"
+    TENSOR_TASK = "tensor"
+    DUMP_DATA_DIR_FIELD = "dump_data_dir"
+    DATA_FIELD = "data"
+    # supported api yaml
+    SUPPORTED_API_LIST_FILE = "checker_support_api.yaml"
+    SUPPORTED_TENSOR_LIST_KEY = "tensor"
+    # detail_csv
+    DETAIL_CSV_API_NAME = "API Name"
+    DETAIL_CSV_BENCH_DTYPE = "Bench Dtype"
+    DETAIL_CSV_TESTED_DTYPE = "Tested Dtype"
+    DETAIL_CSV_SHAPE = "Shape"
+    DETAIL_CSV_PASS_STATUS = "Status"
+    DETAIL_CSV_MESSAGE = "Message"
+    DETAIL_CSV_FILE_NAME = "accuracy_checking_details"
+    # result_csv
+    RESULT_CSV_FORWARD_TEST_SUCCESS = "Forward Test Success"
+    RESULT_CSV_BACKWARD_TEST_SUCCESS = "Backward Test Success"
+    RESULT_CSV_FILE_NAME = "accuracy_checking_result"
+    EPSILON = 1e-8
+    class ProcessStatus:
+        SUCCESS = "success"
+        API_NOT_FOUND = "api_not_found"
+        EXCEPTION_SKIP = "exception_skip"
 class FreeBenchmarkConst:
     ADD_NOISE = "add_noise"
     BIT_NOISE = "bit_noise"

msprobe/mindspore/common/utils.py CHANGED Viewed

@@ -25,7 +25,31 @@ from msprobe.core.common.exceptions import DistributedNotInitializedError
 from msprobe.core.common.file_utils import path_len_exceeds_limit, check_path_exists, save_npy
 from msprobe.core.common.log import logger
 from msprobe.core.common.const import Const
-from msprobe.core.common.utils import CompareException, check_seed_all
+from msprobe.core.common.utils import CompareException, check_seed_all, is_save_variable_valid
+class MsprobeStep(ms.train.Callback):
+    def __init__(self, debugger):
+        super(MsprobeStep, self).__init__()
+        self.debugger = debugger
+    def on_train_step_begin(self, run_context):
+        self.debugger.start()
+    def on_train_step_end(self, run_context):
+        self.debugger.stop()
+        self.debugger.step()
+class MsprobeInitStep(ms.train.Callback):
+    def on_train_begin(self, run_context):
+        try:
+            from ms._c_expression import _set_init_iter
+        except ImportError:
+            logger.warning('MsprobeInitStep does not work on this version of MindSpore.')
+            return
+        cb_params = run_context.original_args()
+        _set_init_iter(cb_params.cur_step_num)
 def get_rank_if_initialized():
@@ -93,20 +117,6 @@ def seed_all(seed=1234, mode=False, rm_dropout=True):
         remove_dropout()
-class MsprobeStep(ms.train.Callback):
-    def __init__(self, debugger):
-        super(MsprobeStep, self).__init__()
-        self.debugger = debugger
-    def on_train_step_begin(self, run_context):
-        self.debugger.start()
-    def on_train_step_end(self, run_context):
-        self.debugger.stop()
-        self.debugger.step()
 class Dropout(ops.Dropout):
     def __init__(self, keep_prob=0.5, seed0=0, seed1=1):
         super().__init__(1., seed0, seed1)
@@ -169,7 +179,7 @@ def set_register_backward_hook_functions():
         from msprobe.mindspore.mindtorch import (_call_impl,
                                                  register_full_backward_pre_hook,
                                                  register_full_backward_hook)
-        if not hasattr(torch, "register_full_backward_hook"):
+        if not hasattr(torch.nn.Module, "register_full_backward_hook"):
             setattr(torch.nn.Module, "_call_impl", _call_impl)
             setattr(torch.nn.Module, "register_full_backward_pre_hook", register_full_backward_pre_hook)
             setattr(torch.nn.Module, "register_full_backward_hook", register_full_backward_hook)
@@ -182,9 +192,11 @@ def set_register_backward_hook_functions():
 def check_save_param(variable, name, save_backward):
     # try catch this api to skip invalid call
-    if not isinstance(variable, (list, dict, ms.Tensor, int, float, str)):
+    valid_data_types = tuple([ms.Tensor, int, float, str])
+    if not is_save_variable_valid(variable, valid_data_types):
+        valid_data_types_with_nested_types = valid_data_types + (dict, tuple, list)
         logger.warning("PrecisionDebugger.save variable type not valid, "
-                       "should be one of list, dict, ms.Tensor, int, float or string. "
+                       f"should be one of {valid_data_types_with_nested_types}"
                        "Skip current save process.")
         raise ValueError
     if not isinstance(name, str):
@@ -196,4 +208,4 @@ def check_save_param(variable, name, save_backward):
         logger.warning("PrecisionDebugger.save_backward name not valid, "
                        "should be bool. "
                        "Skip current save process.")
-        raise ValueError
+        raise ValueError

msprobe/mindspore/compare/ms_compare.py CHANGED Viewed

@@ -22,10 +22,10 @@ import pandas as pd
 from msprobe.core.common.const import CompareConst, Const
 from msprobe.core.common.exceptions import FileCheckException
-from msprobe.core.common.file_utils import FileOpen, create_directory, load_json, load_npy, load_yaml
+from msprobe.core.common.file_utils import create_directory, load_json, load_npy, load_yaml
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import CompareException, check_compare_param, check_configuration_param, \
-    check_op_str_pattern_valid, get_dump_mode, set_dump_path
+    check_op_str_pattern_valid, get_dump_mode, set_dump_path, detect_framework_by_dump_json
 from msprobe.core.compare.acc_compare import Comparator, ModeConfig
 from msprobe.core.compare.check import dtype_mapping
 from msprobe.core.compare.layer_mapping import generate_data_mapping_by_layer_mapping
@@ -78,6 +78,11 @@ class MSComparator(Comparator):
             raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got "
                             f"{type(self.data_mapping)}")
+    @staticmethod
+    def process_data_name(result):
+        result['data_name_x'] = result.apply(lambda row: [row['data_name_x'], row['data_name_y']], axis=1)
+        return result
     def calc_accuracy(self, result_df, header):
         condition_no_bench = result_df[CompareConst.BENCH_NAME] == CompareConst.N_A
         result_df[condition_no_bench] = result_df[condition_no_bench].fillna(CompareConst.N_A)
@@ -120,12 +125,13 @@ class MSComparator(Comparator):
             result_df.loc[~condition_md5_equal & ~condition_no_bench, CompareConst.RESULT] = CompareConst.DIFF
         elif self.dump_mode == Const.SUMMARY:
             warning_list = [calc_summary_diff(data_type) for data_type in ['max', 'min', 'mean', 'l2norm']]
-            warning_flag = pd.DataFrame(warning_list).all()
+            warning_flag = pd.DataFrame(warning_list).any()
             result_df.loc[~condition_no_bench, [CompareConst.RESULT, CompareConst.ERROR_MESSAGE]] = ''
             result_df.loc[warning_flag, CompareConst.RESULT] = CompareConst.WARNING
             result_df.loc[warning_flag, CompareConst.ERROR_MESSAGE] = 'Need double check api accuracy.'
         else:
-            fill_cols = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR,
+            fill_cols = [CompareConst.COSINE, CompareConst.EUC_DIST,
+                         CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR,
                          CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO,
                          CompareConst.ERROR_MESSAGE]
             result_df.loc[~condition_no_bench, fill_cols] = ''
@@ -139,6 +145,8 @@ class MSComparator(Comparator):
             header.append(CompareConst.STACK)
         if self.dump_mode == Const.ALL:
             header.append(CompareConst.DATA_NAME)
+            result = self.process_data_name(result)
         result.rename(columns={'op_name_x': CompareConst.NPU_NAME,
                                'op_name_y': CompareConst.BENCH_NAME,
                                'dtype_x': CompareConst.NPU_DTYPE,
@@ -169,6 +177,7 @@ class MSComparator(Comparator):
         result[npu_summary] = result['summary_x'].apply(set_summary).tolist()
         result[bench_summary] = result['summary_y'].apply(set_summary).tolist()
         result_df = pd.DataFrame(columns=header)
         for h in header:
             if h in result.columns:
@@ -269,15 +278,15 @@ class MSComparator(Comparator):
             bench_dtype = match_result['dtype_y']
             if self.cross_frame:
                 npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype)
-            return ((npu_dtype == bench_dtype) |
-                    ((npu_dtype == Const.FLOAT16) & (bench_dtype == Const.FLOAT32)) |
-                    ((npu_dtype == Const.FLOAT32) & (bench_dtype == Const.FLOAT16)) |
-                    ((npu_dtype == Const.FLOAT16) & (bench_dtype == Const.BFLOAT16)) |
-                    ((npu_dtype == Const.BFLOAT16) & (bench_dtype == Const.FLOAT16)) |
-                    ((npu_dtype == Const.TORCH_FLOAT16) & (bench_dtype == Const.TORCH_FLOAT32)) |
-                    ((npu_dtype == Const.TORCH_FLOAT32) & (bench_dtype == Const.TORCH_FLOAT16)) |
-                    ((npu_dtype == Const.TORCH_FLOAT16) & (bench_dtype == Const.TORCH_BFLOAT16)) |
-                    ((npu_dtype == Const.TORCH_BFLOAT16) & (bench_dtype == Const.TORCH_FLOAT16)))
+            equal_condition = npu_dtype == bench_dtype
+            match_condition = (
+                    (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin(
+                        CompareConst.DTYPE_MATCH_GROUPS[0])) |
+                    (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin(
+                        CompareConst.DTYPE_MATCH_GROUPS[1]))
+            )
+            return equal_condition | match_condition
         match_result.loc[~gen_dtype_condition(), [i + '_y' for i in bench_df.columns]] = CompareConst.N_A
         return self.make_result_df(match_result)
@@ -382,12 +391,11 @@ class MSComparator(Comparator):
 def check_cross_framework(bench_json_path):
-    pattern = r'"data_name":\s*"[^"]+\.pt"'
-    with FileOpen(bench_json_path, 'r') as file:
-        for line in file:
-            if re.search(pattern, line):
-                return True
-    return False
+    framework = detect_framework_by_dump_json(bench_json_path)
+    if framework == Const.PT_FRAMEWORK:
+        return True
+    else:
+        return False
 def ms_compare(input_param, output_path, **kwargs):

msprobe/mindspore/compare/ms_graph_compare.py CHANGED Viewed

@@ -195,11 +195,12 @@ class GraphMSComparator:
             if not error_flag:
                 result_list, err_msg = compare_ops_apply(n_value, b_value, False, "")
                 result_dict[CompareConst.COSINE] = result_list[0]
-                result_dict[CompareConst.MAX_ABS_ERR] = result_list[1]
-                result_dict[CompareConst.MAX_RELATIVE_ERR] = result_list[2]
-                result_dict[CompareConst.ONE_THOUSANDTH_ERR_RATIO] = result_list[3]
-                result_dict[CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = result_list[4]
-                result_dict[CompareConst.ACCURACY] = check_accuracy(result_list[0], result_list[1])
+                result_dict[CompareConst.EUC_DIST] = result_list[1]
+                result_dict[CompareConst.MAX_ABS_ERR] = result_list[2]
+                result_dict[CompareConst.MAX_RELATIVE_ERR] = result_list[3]
+                result_dict[CompareConst.ONE_THOUSANDTH_ERR_RATIO] = result_list[4]
+                result_dict[CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = result_list[5]
+                result_dict[CompareConst.ACCURACY] = check_accuracy(result_list[0], result_list[2])
                 result_dict[CompareConst.ERROR_MESSAGE] = err_msg
             return pd.Series(result_dict)

msprobe/mindspore/debugger/debugger_config.py CHANGED Viewed

@@ -53,11 +53,13 @@ class DebuggerConfig:
             self.stage = FreeBenchmarkConst.DEFAULT_STAGE if not task_config.fuzz_stage else task_config.fuzz_stage
             if self.handler_type == FreeBenchmarkConst.FIX and \
                     self.pert_type != FreeBenchmarkConst.DEFAULT_PERT_TYPE:
-                raise ValueError("pert_mode must be improve_precision or empty when handler_type is fix, "
-                                 f"but got {self.pert_type}.")
+                logger.error("pert_mode must be improve_precision or empty when handler_type is fix, "
+                             f"but got {self.pert_type}.")
+                raise ValueError
             if self.stage == Const.BACKWARD and self.handler_type == FreeBenchmarkConst.FIX:
-                raise ValueError("handler_type must be check or empty when fuzz_stage is backward, "
-                                 f"but got {self.handler_type}.")
+                logger.error("handler_type must be check or empty when fuzz_stage is backward, "
+                             f"but got {self.handler_type}.")
+                raise ValueError
             self.dump_level = FreeBenchmarkConst.DEFAULT_DUMP_LEVEL
     def check(self):

msprobe/mindspore/debugger/precision_debugger.py CHANGED Viewed

@@ -22,12 +22,12 @@ from mindspore._c_expression import MSContext
 from msprobe.core.common.const import Const, FileCheckConst, MsgConst
 from msprobe.core.common.exceptions import MsprobeException
 from msprobe.core.common.file_utils import FileChecker
-from msprobe.core.common.utils import get_real_step_or_rank
+from msprobe.core.common.utils import get_real_step_or_rank, check_init_step
 from msprobe.mindspore.cell_processor import CellProcessor
 from msprobe.mindspore.common.const import Const as MsConst
 from msprobe.mindspore.common.utils import set_register_backward_hook_functions, check_save_param
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
-from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+from msprobe.mindspore.dump.hook_cell.api_register import get_api_register
 from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell
 from msprobe.mindspore.grad_probe.grad_monitor import GradientMonitor
 from msprobe.mindspore.ms_config import parse_json_config
@@ -84,7 +84,7 @@ class PrecisionDebugger:
         common_config.dump_path = dump_path if dump_path else common_config.dump_path
         self.config = DebuggerConfig(common_config, task_config)
-        if _msprobe_c:
+        if self._need_msprobe_c() and _msprobe_c:
             _msprobe_c._PrecisionDebugger(framework="MindSpore", config_path=config_path)
         self.config.execution_mode = self._get_execution_mode()
@@ -151,7 +151,7 @@ class PrecisionDebugger:
         instance = cls._instance
         if not instance:
             raise Exception(MsgConst.NOT_CREATED_INSTANCE)
-        if _msprobe_c:
+        if cls._need_msprobe_c() and _msprobe_c:
             _msprobe_c._PrecisionDebugger().start()
         if instance.task in PrecisionDebugger.task_not_need_service:
             return
@@ -163,7 +163,7 @@ class PrecisionDebugger:
             instance.service.start(model)
         else:
             if not instance.first_start:
-                api_register.api_set_ori_func()
+                get_api_register().restore_all_api()
                 handler = TaskHandlerFactory.create(instance.config)
                 handler.handle()
@@ -180,8 +180,6 @@ class PrecisionDebugger:
         instance = cls._instance
         if not instance:
             raise Exception(MsgConst.NOT_CREATED_INSTANCE)
-        if _msprobe_c:
-            _msprobe_c._PrecisionDebugger().stop()
         if instance.task == Const.GRAD_PROBE:
             instance.gm.stop()
         if instance.task in PrecisionDebugger.task_not_need_service:
@@ -195,8 +193,6 @@ class PrecisionDebugger:
         instance = cls._instance
         if not instance:
             raise Exception(MsgConst.NOT_CREATED_INSTANCE)
-        if _msprobe_c:
-            _msprobe_c._PrecisionDebugger().step()
         if instance.task in PrecisionDebugger.task_not_need_service:
             return
         if instance.service:
@@ -233,6 +229,15 @@ class PrecisionDebugger:
                 instance.service = Service(instance.config)
             instance.service.save(variable, name, save_backward)
+    @classmethod
+    def set_init_step(cls, step):
+        instance = cls._instance
+        if not instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
+        check_init_step(step)
+        instance.service.init_step = step
+        instance.service.loop = 0
     @classmethod
     def _need_service(cls):
         instance = cls._instance
@@ -241,4 +246,11 @@ class PrecisionDebugger:
         if instance.config.execution_mode != MsConst.PYNATIVE_MODE:
             return False
         else:
-            return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config)
+            return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config)
+    @classmethod
+    def _need_msprobe_c(cls):
+        instance = cls._instance
+        if not instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
+        return instance.config.level_ori == Const.LEVEL_L2

msprobe/mindspore/dump/dump_tool_factory.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,6 +14,7 @@
 # limitations under the License.
 from msprobe.mindspore.common.const import Const
+from msprobe.core.common.log import logger
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
 from msprobe.mindspore.dump.kernel_graph_dump import KernelGraphDump
 from msprobe.mindspore.dump.kernel_kbyk_dump import KernelKbykDump
@@ -47,6 +48,7 @@ class DumpToolFactory:
             raise Exception("Valid level is needed.")
         tool = tool.get(config.execution_mode)
         if not tool:
-            raise Exception(f"Data dump is not supported in {config.execution_mode} mode "
-                            f"when dump level is {config.level}.")
+            logger.error(f"Data dump is not supported in {config.execution_mode} mode "
+                         f"when dump level is {config.level}.")
+            raise ValueError
         return tool(config)

msprobe/mindspore/dump/hook_cell/api_register.py ADDED Viewed

@@ -0,0 +1,142 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from mindspore import Tensor, ops, mint
+from mindspore.mint.nn import functional
+from mindspore.communication import comm_func
+from msprobe.core.common.file_utils import load_yaml
+from msprobe.core.common.utils import Const
+from msprobe.core.data_dump.api_registry import ApiRegistry
+from msprobe.mindspore.common.const import Const as MsConst
+from msprobe.mindspore.common.utils import is_mindtorch
+from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell
+stub_tensor_existed = True
+try:
+    from mindspore.common._stub_tensor import StubTensor
+except ImportError:
+    stub_tensor_existed = False
+cur_path = os.path.dirname(os.path.realpath(__file__))
+if not is_mindtorch():
+    _api_types = {
+        Const.MS_FRAMEWORK: {
+            Const.MS_API_TYPE_OPS: (ops, (ops,)),
+            Const.MS_API_TYPE_TENSOR: (Tensor, (Tensor,)),
+            Const.MS_API_TYPE_MINT: (mint, (mint,)),
+            Const.MS_API_TYPE_MINT_FUNC: (functional, (functional,)),
+            Const.MS_API_TYPE_COM: (comm_func, (comm_func,))
+        }
+    }
+    if stub_tensor_existed:
+        _api_types.get(Const.MS_FRAMEWORK).update(
+            {Const.MS_API_TYPE_STUB_TENSOR: (StubTensor, (StubTensor,))}
+        )
+    _supported_api_list_path = (os.path.join(cur_path, MsConst.SUPPORTED_API_LIST_FILE),)
+else:
+    import torch
+    import torch_npu
+    _api_types = {
+        Const.MT_FRAMEWORK: {
+            Const.PT_API_TYPE_FUNCTIONAL: (torch.nn.functional, (torch.nn.functional,)),
+            Const.PT_API_TYPE_TENSOR: (torch.Tensor, (torch.Tensor,)),
+            Const.PT_API_TYPE_TORCH: (torch, (torch,)),
+            Const.PT_API_TYPE_NPU: (torch_npu, (torch_npu,)),
+            Const.PT_API_TYPE_DIST: (torch.distributed, (torch.distributed, torch.distributed.distributed_c10d))
+        }
+    }
+    _supported_api_list_path = (os.path.join(cur_path, '../../../pytorch/hook_module',
+                                             MsConst.SUPPORTED_API_LIST_FILE),)
+_inner_used_api = {
+    Const.MS_FRAMEWORK + Const.SEP + Const.MS_API_TYPE_OPS: (
+        ops, "norm", "square", "sqrt", "is_complex", "stack", "is_floating_point"
+    ),
+    Const.MS_FRAMEWORK + Const.SEP + Const.MS_API_TYPE_TENSOR: (
+        Tensor, "to", "numel"
+    ),
+    Const.MS_FRAMEWORK + Const.SEP + Const.MS_API_TYPE_MINT: (
+        mint, "max", "min", "mean", "norm"
+    )
+}
+class ApiTemplate(HOOKCell):
+    def __init__(self, api_name, api_func, prefix, hook_build_func):
+        self.api_name = api_name
+        self.api_func = api_func
+        self.prefix_api_name = prefix + Const.SEP + str(api_name.split(Const.SEP)[-1]) + Const.SEP
+        super().__init__(hook_build_func)
+    @staticmethod
+    def async_to_sync(output):
+        # Fake handle, used to return after the CommHandle executes the wait method
+        fake_handle = type("FakeHandle", (), {"wait": lambda self: None})()
+        if isinstance(output, tuple) and len(output) == 2 and hasattr(output[1], "wait"):
+            output[1].wait()
+            output = (output[0], fake_handle)
+        elif hasattr(output, "wait"):
+            output.wait()
+            output = fake_handle
+        return output
+    def construct(self, *args, **kwargs):
+        if self.api_name.startswith(MsConst.DROPOUT_API_NAME_PREFIX):
+            return args[0] if args else kwargs.get(Const.INPUT)
+        output = self.api_func(*args, **kwargs)
+        if self.prefix_api_name.startswith(MsConst.DISTRIBUTED_DATA_PREFIX):
+            if kwargs.get("async_op") or self.api_name in ["isend", "irecv"]:
+                output = self.async_to_sync(output)
+        return output
+    def forward(self, *args, **kwargs):
+        if self.api_name.startswith(MsConst.DROPOUT_API_NAME_PREFIX):
+            return args[0] if args else kwargs.get(Const.INPUT)
+        return self.api_func(*args, **kwargs)
+api_register = None
+stub_tensor_set = False
+def get_api_register(return_new=False):
+    global stub_tensor_set
+    def stub_method(method):
+        def wrapped_method(*args, **kwargs):
+            return method(*args, **kwargs)
+        return wrapped_method
+    if not is_mindtorch() and stub_tensor_existed and not stub_tensor_set:
+        api_names = load_yaml(_supported_api_list_path[0]).get(Const.MS_API_TYPE_TENSOR, [])
+        for attr_name in dir(StubTensor):
+            attr = getattr(StubTensor, attr_name)
+            if attr_name in api_names and callable(attr):
+                setattr(StubTensor, attr_name, stub_method(attr))
+        stub_tensor_set = True
+    if return_new:
+        return ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate)
+    global api_register
+    if api_register is None:
+        api_register = ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate)
+    return api_register

msprobe/mindspore/dump/hook_cell/hook_cell.py CHANGED Viewed

@@ -28,23 +28,22 @@ def get_cell_count(name):
     return HOOKCell.cell_count[name]
-def __init__(self, build_hook) -> None:
+def __init__(self, hook_build_func) -> None:
     super(HOOKCell, self).__init__()
     self.changed_status = False
     self.input_kwargs = {}
-    self.prefix = ""
     if not HOOKCell.g_stop_hook:
         HOOKCell.g_stop_hook = True
         self.changed_status = True
-        if hasattr(self, "prefix_api_name"):
-            self.prefix = self.prefix_api_name
         self.forward_data_collected = False
-        forward_pre_hook, forward_hook, backward_hook, backward_pre_hook = build_hook(self.prefix)
-        self.register_forward_pre_hook(forward_pre_hook)
-        self.register_forward_hook(forward_hook)
-        register_backward_hook_functions["full"](self, backward_hook)
-        register_backward_hook_functions["pre"](self, backward_pre_hook)
+        prefix = self.prefix_api_name if hasattr(self, "prefix_api_name") else ""
+        if callable(hook_build_func):
+            forward_pre_hook, forward_hook, backward_hook, backward_pre_hook = hook_build_func(prefix)
+            self.register_forward_pre_hook(forward_pre_hook)
+            self.register_forward_hook(forward_hook)
+            register_backward_hook_functions["full"](self, backward_hook)
+            register_backward_hook_functions["pre"](self, backward_pre_hook)
 # 重载call，加全局标志。

mindstudio-probe 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

mindstudio-probe 1.2.2py3-none-any.whl → 1.3.0py3-none-any.whl