PyPI - mindstudio-probe - Versions diffs - 1.0.1__py3-none-any.whl - Mend

mindstudio-probe 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

mindstudio_probe-1.0.1.dist-info/LICENSE +201 -0
mindstudio_probe-1.0.1.dist-info/METADATA +30 -0
mindstudio_probe-1.0.1.dist-info/RECORD +228 -0
mindstudio_probe-1.0.1.dist-info/WHEEL +5 -0
mindstudio_probe-1.0.1.dist-info/entry_points.txt +2 -0
mindstudio_probe-1.0.1.dist-info/top_level.txt +1 -0
msprobe/README.md +182 -0
msprobe/__init__.py +0 -0
msprobe/config/README.md +397 -0
msprobe/config/config.json +28 -0
msprobe/config/img/free_benchmark.png +0 -0
msprobe/core/common/const.py +241 -0
msprobe/core/common/exceptions.py +88 -0
msprobe/core/common/file_check.py +265 -0
msprobe/core/common/log.py +55 -0
msprobe/core/common/utils.py +516 -0
msprobe/core/common_config.py +58 -0
msprobe/core/data_dump/data_collector.py +140 -0
msprobe/core/data_dump/data_processor/base.py +245 -0
msprobe/core/data_dump/data_processor/factory.py +61 -0
msprobe/core/data_dump/data_processor/pytorch_processor.py +346 -0
msprobe/core/data_dump/json_writer.py +116 -0
msprobe/core/data_dump/scope.py +178 -0
msprobe/mindspore/__init__.py +1 -0
msprobe/mindspore/debugger/__init__.py +0 -0
msprobe/mindspore/debugger/debugger_config.py +51 -0
msprobe/mindspore/debugger/precision_debugger.py +32 -0
msprobe/mindspore/doc/dump.md +65 -0
msprobe/mindspore/dump/__init__.py +0 -0
msprobe/mindspore/dump/api_kbk_dump.py +55 -0
msprobe/mindspore/dump/dump_tool_factory.py +38 -0
msprobe/mindspore/dump/kernel_graph_dump.py +60 -0
msprobe/mindspore/ms_config.py +78 -0
msprobe/mindspore/overflow_check/__init__.py +0 -0
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +45 -0
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +32 -0
msprobe/mindspore/task_handler_factory.py +21 -0
msprobe/msprobe.py +67 -0
msprobe/pytorch/__init__.py +4 -0
msprobe/pytorch/advisor/advisor.py +124 -0
msprobe/pytorch/advisor/advisor_const.py +59 -0
msprobe/pytorch/advisor/advisor_result.py +58 -0
msprobe/pytorch/api_accuracy_checker/.keep +0 -0
msprobe/pytorch/api_accuracy_checker/__init__.py +0 -0
msprobe/pytorch/api_accuracy_checker/common/.keep +0 -0
msprobe/pytorch/api_accuracy_checker/common/__init__.py +0 -0
msprobe/pytorch/api_accuracy_checker/common/config.py +50 -0
msprobe/pytorch/api_accuracy_checker/common/utils.py +224 -0
msprobe/pytorch/api_accuracy_checker/compare/__init__.py +0 -0
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +216 -0
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +545 -0
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +133 -0
msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -0
msprobe/pytorch/api_accuracy_checker/compare/compare.py +345 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +74 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +249 -0
msprobe/pytorch/api_accuracy_checker/config.yaml +4 -0
msprobe/pytorch/api_accuracy_checker/run_ut/.keep +0 -0
msprobe/pytorch/api_accuracy_checker/run_ut/__init__.py +0 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +328 -0
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +203 -0
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +127 -0
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +493 -0
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +7 -0
msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +5 -0
msprobe/pytorch/common/__init__.py +2 -0
msprobe/pytorch/common/compare_script.template +14 -0
msprobe/pytorch/common/log.py +32 -0
msprobe/pytorch/common/parse_json.py +37 -0
msprobe/pytorch/common/utils.py +224 -0
msprobe/pytorch/compare/acc_compare.py +1024 -0
msprobe/pytorch/compare/distributed_compare.py +111 -0
msprobe/pytorch/compare/highlight.py +100 -0
msprobe/pytorch/compare/mapping.yaml +607 -0
msprobe/pytorch/compare/match.py +36 -0
msprobe/pytorch/compare/npy_compare.py +244 -0
msprobe/pytorch/debugger/__init__.py +0 -0
msprobe/pytorch/debugger/debugger_config.py +86 -0
msprobe/pytorch/debugger/precision_debugger.py +95 -0
msprobe/pytorch/doc/FAQ.md +193 -0
msprobe/pytorch/doc/api_accuracy_checker.md +269 -0
msprobe/pytorch/doc/atat/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +182 -0
msprobe/pytorch/doc/dump.md +207 -0
msprobe/pytorch/doc/img/BLOOM-7B_1.png +0 -0
msprobe/pytorch/doc/img/BLOOM-7B_2.png +0 -0
msprobe/pytorch/doc/img/BLOOM-7B_3.png +0 -0
msprobe/pytorch/doc/img/BLOOM-7B_4.png +0 -0
msprobe/pytorch/doc/img/GPT-3_1.png +0 -0
msprobe/pytorch/doc/img/GPT-3_2.png +0 -0
msprobe/pytorch/doc/img/GPT-3_3.png +0 -0
msprobe/pytorch/doc/img/GPT-3_4.png +0 -0
msprobe/pytorch/doc/img/GPT-3_5.png +0 -0
msprobe/pytorch/doc/img/GPT-3_6.png +0 -0
msprobe/pytorch/doc/img/GPT-3_7.png +0 -0
msprobe/pytorch/doc/img/GPT-3_8.png +0 -0
msprobe/pytorch/doc/img/YOLOV5S_1.png +0 -0
msprobe/pytorch/doc/img/YOLOV5S_2.png +0 -0
msprobe/pytorch/doc/img/accuracy_checking_details.png +0 -0
msprobe/pytorch/doc/img/accuracy_checking_result.png +0 -0
msprobe/pytorch/doc/img/api_precision_compare_details.png +0 -0
msprobe/pytorch/doc/img/api_precision_compare_result.png +0 -0
msprobe/pytorch/doc/img/auto_analyze_log.png +0 -0
msprobe/pytorch/doc/img/compare_result_pkl.png +0 -0
msprobe/pytorch/doc/img/compare_result_pkl_md5.png.png +0 -0
msprobe/pytorch/doc/img/cpu_info.png +0 -0
msprobe/pytorch/doc/img/module_compare.png +0 -0
msprobe/pytorch/doc/parse_tool.md +286 -0
msprobe/pytorch/doc/ptdbg_ascend_compare.md +176 -0
msprobe/pytorch/doc/ptdbg_ascend_overview.md +68 -0
msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +381 -0
msprobe/pytorch/doc/run_overflow_check.md +25 -0
msprobe/pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md +90 -0
msprobe/pytorch/free_benchmark/__init__.py +8 -0
msprobe/pytorch/free_benchmark/common/__init__.py +0 -0
msprobe/pytorch/free_benchmark/common/constant.py +67 -0
msprobe/pytorch/free_benchmark/common/counter.py +72 -0
msprobe/pytorch/free_benchmark/common/enums.py +37 -0
msprobe/pytorch/free_benchmark/common/params.py +129 -0
msprobe/pytorch/free_benchmark/common/utils.py +98 -0
msprobe/pytorch/free_benchmark/compare/grad_saver.py +183 -0
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +104 -0
msprobe/pytorch/free_benchmark/main.py +102 -0
msprobe/pytorch/free_benchmark/perturbed_layers/__init__.py +0 -0
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +13 -0
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +41 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/__init__.py +0 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +90 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +104 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +63 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +68 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +28 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +45 -0
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +19 -0
msprobe/pytorch/free_benchmark/result_handlers/__init__.py +0 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +203 -0
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +39 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +24 -0
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +31 -0
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +170 -0
msprobe/pytorch/functional/__init__.py +0 -0
msprobe/pytorch/functional/data_processor.py +0 -0
msprobe/pytorch/functional/dump_module.py +39 -0
msprobe/pytorch/hook_module/__init__.py +1 -0
msprobe/pytorch/hook_module/api_registry.py +161 -0
msprobe/pytorch/hook_module/hook_module.py +109 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1876 -0
msprobe/pytorch/hook_module/utils.py +29 -0
msprobe/pytorch/hook_module/wrap_aten.py +100 -0
msprobe/pytorch/hook_module/wrap_distributed.py +75 -0
msprobe/pytorch/hook_module/wrap_functional.py +108 -0
msprobe/pytorch/hook_module/wrap_npu_custom.py +73 -0
msprobe/pytorch/hook_module/wrap_tensor.py +72 -0
msprobe/pytorch/hook_module/wrap_torch.py +88 -0
msprobe/pytorch/hook_module/wrap_vf.py +64 -0
msprobe/pytorch/module_processer.py +98 -0
msprobe/pytorch/online_dispatch/__init__.py +20 -0
msprobe/pytorch/online_dispatch/compare.py +236 -0
msprobe/pytorch/online_dispatch/dispatch.py +274 -0
msprobe/pytorch/online_dispatch/dump_compare.py +186 -0
msprobe/pytorch/online_dispatch/single_compare.py +391 -0
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +50 -0
msprobe/pytorch/online_dispatch/utils.py +187 -0
msprobe/pytorch/parse.py +4 -0
msprobe/pytorch/parse_tool/__init__.py +0 -0
msprobe/pytorch/parse_tool/cli.py +32 -0
msprobe/pytorch/parse_tool/lib/__init__.py +0 -0
msprobe/pytorch/parse_tool/lib/compare.py +259 -0
msprobe/pytorch/parse_tool/lib/config.py +51 -0
msprobe/pytorch/parse_tool/lib/file_desc.py +31 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -0
msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -0
msprobe/pytorch/parse_tool/lib/parse_tool.py +158 -0
msprobe/pytorch/parse_tool/lib/utils.py +367 -0
msprobe/pytorch/parse_tool/lib/visualization.py +90 -0
msprobe/pytorch/pt_config.py +93 -0
msprobe/pytorch/service.py +167 -0
msprobe/test/core_ut/common/test_utils.py +345 -0
msprobe/test/core_ut/data_dump/test_data_collector.py +47 -0
msprobe/test/core_ut/data_dump/test_json_writer.py +183 -0
msprobe/test/core_ut/data_dump/test_scope.py +151 -0
msprobe/test/core_ut/test_common_config.py +152 -0
msprobe/test/core_ut/test_file_check.py +218 -0
msprobe/test/core_ut/test_log.py +109 -0
msprobe/test/mindspore_ut/test_api_kbk_dump.py +51 -0
msprobe/test/mindspore_ut/test_debugger_config.py +42 -0
msprobe/test/mindspore_ut/test_dump_tool_factory.py +51 -0
msprobe/test/mindspore_ut/test_kernel_graph_dump.py +66 -0
msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py +63 -0
msprobe/test/mindspore_ut/test_ms_config.py +69 -0
msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py +51 -0
msprobe/test/mindspore_ut/test_precision_debugger.py +56 -0
msprobe/test/mindspore_ut/test_task_handler_factory.py +58 -0
msprobe/test/pytorch_ut/advisor/test_advisor.py +83 -0
msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +108 -0
msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py +39 -0
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py +112 -0
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py +77 -0
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py +125 -0
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py +10 -0
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py +43 -0
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json +179 -0
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json +63 -0
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py +99 -0
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py +115 -0
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py +72 -0
msprobe/test/pytorch_ut/compare/test_acc_compare.py +17 -0
msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py +105 -0
msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py +121 -0
msprobe/test/pytorch_ut/free_benchmark/test_main.py +101 -0
msprobe/test/pytorch_ut/functional/test_dump_module.py +15 -0
msprobe/test/pytorch_ut/hook_module/test_api_registry.py +130 -0
msprobe/test/pytorch_ut/hook_module/test_hook_module.py +42 -0
msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py +65 -0
msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py +35 -0
msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py +20 -0
msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py +35 -0
msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py +43 -0
msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py +11 -0
msprobe/test/pytorch_ut/test_pt_config.py +69 -0
msprobe/test/pytorch_ut/test_service.py +59 -0
msprobe/test/resources/advisor.txt +3 -0
msprobe/test/resources/compare_result_20230703104808.csv +9 -0
msprobe/test/resources/compare_result_without_accuracy.csv +9 -0
msprobe/test/resources/config.yaml +3 -0
msprobe/test/resources/npu_test.pkl +8 -0
msprobe/test/run_test.sh +30 -0
msprobe/test/run_ut.py +58 -0
msprobe/test/test_module_processer.py +64 -0

msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py ADDED Viewed

@@ -0,0 +1,90 @@
+import torch
+from msprobe.pytorch.free_benchmark import logger
+from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
+from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
+from msprobe.pytorch.free_benchmark.common.params import DataParams
+from msprobe.pytorch.free_benchmark.common.utils import TorchC
+from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import (
+    NpuBaseLayer,
+)
+class AddNoiseLayer(NpuBaseLayer):
+    def add_noise(self, tensor_obj):
+        if isinstance(tensor_obj, torch.Tensor):
+            self.perturbed_value = ThresholdConfig.PERTURBATION_VALUE_DICT.get(
+                tensor_obj.dtype
+            )
+            if not self.pre_check(tensor_obj):
+                return tensor_obj
+            noise = self._get_noise(tensor_obj)
+            result = TorchC.where(
+                TorchC.gt(TorchC.abs(tensor_obj), self.perturbed_value ** 0.5),
+                TorchC.add(noise, tensor_obj),
+                tensor_obj,
+            ).to(tensor_obj.dtype)
+            self.is_added = True
+            return result
+        if isinstance(tensor_obj, dict):
+            return {key: self.add_noise(value) for key, value in tensor_obj.items()}
+        if isinstance(tensor_obj, (tuple, list)):
+            return type(tensor_obj)([self.add_noise(value) for value in tensor_obj])
+        return tensor_obj
+    def handle(self, params: DataParams) -> torch.Any:
+        """
+        对输入添加扰动并返回
+        """
+        logger.info_on_rank_0(
+            f"[msprobe] Free benchmark: Perturbation is "
+            f"{PerturbationMode.ADD_NOISE} of {self.api_name}."
+        )
+        params.perturbed_value = self.add_noise(params.args[params.valid_input_index])
+        return self.perturbed_result(params)
+    def _get_noise(self, tensor_obj):
+        dtype = tensor_obj.dtype
+        device = str(tensor_obj.device)
+        noise = TorchC.full(
+            tensor_obj.shape,
+            self.perturbed_value,
+            device=device,
+            dtype=dtype,
+        )
+        return noise
+    def _check_details(self, tensor_obj):
+        """
+        判断是否需要添加扰动
+        """
+        if not self.perturbed_value:
+            logger.warning_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.api_name}, "
+                f"dtype unsupported. Cancel perturbation."
+            )
+            return False
+        if tensor_obj.numel() == 0:
+            logger.warning_on_rank_0(
+                f"[msprobe] Free benchmark: For {self.api_name}, tensor shape must > 0."
+                f" Cancel adding noise."
+            )
+            return False
+        abs_tol = ThresholdConfig.ABS_TOL_VALUE_DICT.get(
+            tensor_obj.dtype, ThresholdConfig.NOISE_INPUT_LOWER_BOUND
+        )
+        try:
+            max_val = TorchC.max(TorchC.abs(tensor_obj)).item()
+        except Exception:
+            logger.warning_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.api_name}, "
+                f"when calculate maximun value, tensor is changed to float32."
+            )
+            max_val = TorchC.max(TorchC.abs(tensor_obj.to(torch.float32))).item()
+        if max_val < abs_tol:
+            logger.warning_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.api_name}, "
+                f"Maximun value is less than the  minimun threshold. Cancel add noise."
+            )
+            return False
+        return True

msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py ADDED Viewed

@@ -0,0 +1,104 @@
+import torch
+from msprobe.pytorch.free_benchmark import logger
+from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
+from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
+from msprobe.pytorch.free_benchmark.common.params import DataParams
+from msprobe.pytorch.free_benchmark.common.utils import TorchC
+from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import (
+    NpuBaseLayer,
+)
+class BitNoiseLayer(NpuBaseLayer):
+    def __init__(self, api_name):
+        super().__init__(api_name)
+        self.bit_mode = TorchC.bitwise_xor
+        self.bit_tail: int = 1
+        self.bit_type = None
+    def add_bit_noise(self, tensor_obj):
+        """
+        对输入添加噪声
+        """
+        # finfo应该列入黑名单
+        if isinstance(tensor_obj, torch.Tensor):
+            self._set_perturbation_bit(tensor_obj)
+            if not self.pre_check(tensor_obj):
+                return tensor_obj
+            sub_normal = torch.finfo(tensor_obj.dtype).smallest_normal
+            noise = TorchC.full(
+                tensor_obj.shape,
+                self.bit_tail,
+                device=tensor_obj.device,
+                dtype=self.bit_type,
+            )
+            result = tensor_obj.view(self.bit_type)
+            result = TorchC.where(
+                TorchC.gt(TorchC.abs(tensor_obj), sub_normal),
+                self.bit_mode(result, noise),
+                result,
+            ).view(tensor_obj.dtype)
+            self.is_added = True
+            return result
+        if isinstance(tensor_obj, dict):
+            return {key: self.add_bit_noise(value) for key, value in tensor_obj.items()}
+        if isinstance(tensor_obj, (tuple, list)):
+            return type(tensor_obj)([self.add_bit_noise(value) for value in tensor_obj])
+        return tensor_obj
+    def handle(self, params: DataParams) -> torch.Any:
+        """
+        对输入添加扰动并返回
+        """
+        logger.info_on_rank_0(
+            f"[msprobe] Free benchmark: Perturbation is "
+            f"{PerturbationMode.BIT_NOISE} of {self.api_name}."
+        )
+        params.perturbed_value = self.add_bit_noise(params.args[params.valid_input_index])
+        return self.perturbed_result(params)
+    def _check_details(self, tensor_obj):
+        """
+        判断是否需要添加扰动,  bit翻转
+        """
+        if not self.bit_type:
+            logger.info_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.api_name}, "
+                f"dtype unsupported. Cancel perturbation."
+            )
+            return False
+        if tensor_obj.numel() == 0:
+            logger.warning_on_rank_0(
+                f"[msprobe] Free benchmark: For {self.api_name}, tensor shape must > 0"
+                f" Cancel adding noise."
+            )
+            return False
+        abs_tol = ThresholdConfig.ABS_TOL_VALUE_DICT.get(
+            tensor_obj.dtype, ThresholdConfig.NOISE_INPUT_LOWER_BOUND
+        )
+        try:
+            max_val = TorchC.max(TorchC.abs(tensor_obj)).item()
+        except Exception:
+            logger.warning_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.api_name}, "
+                f"when calculate maximun value, tensor is changed to float32."
+            )
+            max_val = TorchC.max(TorchC.abs(tensor_obj.to(torch.float32))).item()
+        if max_val < abs_tol:
+            logger.info_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.api_name}, "
+                f"Maximun value is less than the  minimun threshold. Cancel add noise."
+            )
+            return False
+        return True
+    def _set_perturbation_bit(self, tensor_obj):
+        """
+        根据不同浮点数确定不同位数扰动值
+        """
+        bit_len_type = ThresholdConfig.PERTURBATION_BIT_DICT.get(tensor_obj.dtype)
+        if bit_len_type:
+            self.bit_tail = 1
+            self.bit_type = bit_len_type

msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py ADDED Viewed

@@ -0,0 +1,63 @@
+import torch
+from msprobe.pytorch.free_benchmark import logger
+from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
+from msprobe.pytorch.free_benchmark.common.params import DataParams
+from msprobe.pytorch.free_benchmark.common.utils import TorchC
+from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import (
+    NpuBaseLayer,
+)
+class ChangeValueLayer(NpuBaseLayer):
+    def __init__(self, api_name):
+        super().__init__(api_name)
+        self.head: int = 0
+        self.tail: int = -1
+    def change_value(self, tensor_obj):
+        """
+        交换张量首尾
+        """
+        if isinstance(tensor_obj, torch.Tensor) and self.pre_check(tensor_obj):
+            new_tensor = TorchC.clone(tensor_obj)
+            if new_tensor.ndim == 1:
+                temp_first = TorchC.clone(new_tensor[self.head])
+                temp_last = TorchC.clone(new_tensor[self.tail])
+                new_tensor[self.head] = temp_last
+                new_tensor[self.tail] = temp_first
+            else:
+                temp_first = TorchC.clone(new_tensor[self.head][self.head])
+                temp_last = TorchC.clone(new_tensor[self.tail][self.tail])
+                new_tensor[self.head][self.head] = temp_last
+                new_tensor[self.tail][self.tail] = temp_first
+            self.is_added = True
+            return new_tensor
+        if isinstance(tensor_obj, dict):
+            return {key: self.change_value(value) for key, value in tensor_obj.items()}
+        if isinstance(tensor_obj, (tuple, list)):
+            return type(tensor_obj)([self.change_value(value) for value in tensor_obj])
+        return tensor_obj
+    def handle(self, params: DataParams) -> torch.Any:
+        """
+        对输入添加扰动并返回
+        """
+        logger.info_on_rank_0(
+            f"[msprobe] Free benchmark: Perturbation is "
+            f"{PerturbationMode.CHANGE_VALUE} of {self.api_name}."
+        )
+        params.perturbed_value = self.change_value(params.args[params.valid_input_index])
+        return self.perturbed_result(params)
+    def _check_details(self, tensor_obj):
+        """
+        判断是否需要添加扰动,  首尾值交换
+        """
+        if tensor_obj.size(0) < 2:
+            logger.info_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.api_name}, "
+                f"size 0 must greater than 1. Cancel change value."
+            )
+            return False
+        return True

msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py ADDED Viewed

@@ -0,0 +1,68 @@
+import torch
+from msprobe.core.common.const import Const
+from msprobe.pytorch.free_benchmark import logger
+from msprobe.pytorch.free_benchmark.common.constant import CommonField
+from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
+from msprobe.pytorch.free_benchmark.common.params import DataParams
+from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import (
+    NpuBaseLayer,
+)
+class ImprovePrecisionLayer(NpuBaseLayer):
+    def improve_tensor_precision(self, tensor_obj):
+        if (
+            isinstance(tensor_obj, torch.Tensor)
+            and torch.is_floating_point(tensor_obj)
+            and tensor_obj.dtype not in [torch.float32, torch.float64]
+        ):
+            self._set_improve_valus(tensor_obj)
+            tensor_obj = self._change_dtype(tensor_obj)
+            self.is_added = True
+            return tensor_obj
+        if isinstance(tensor_obj, dict):
+            return {
+                key: self.improve_tensor_precision(value)
+                for key, value in tensor_obj.items()
+            }
+        if isinstance(tensor_obj, (tuple, list)):
+            return type(tensor_obj)(
+                [self.improve_tensor_precision(value) for value in tensor_obj]
+            )
+        return tensor_obj
+    def handle(self, params: DataParams) -> torch.Any:
+        logger.info_on_rank_0(
+            f"[msprobe] Free benchmark: Perturbation is "
+            f"{PerturbationMode.IMPROVE_PRECISION} of {self.api_name}."
+        )
+        new_args = self.improve_tensor_precision(params.args)
+        if params.fuzz_stage == Const.BACKWARD:
+            new_kwargs = {}
+        else:
+            new_kwargs = self.improve_tensor_precision(params.kwargs)
+        # 如果输入中全为高精度、应跳过二次执行、减少多余显存引用
+        if not self.is_added:
+            return params.perturbed_result
+        if "inplace" in new_kwargs:
+            new_kwargs["inplace"] = False
+        params.perturbed_result = params.origin_func(*new_args, **new_kwargs)
+        return params.perturbed_result
+    def _set_improve_valus(self, inputs):
+        if inputs.dtype in [torch.float16, torch.bfloat16]:
+            self.perturbed_value = torch.float32
+    def _change_dtype(self, inputs):
+        if hasattr(inputs, CommonField.DEVICE):
+            device = inputs.device
+            if device is CommonField.META:
+                new_inputs = inputs.to(
+                    device=CommonField.META, dtype=self.perturbed_value
+                )
+            else:
+                new_inputs = inputs.to(dtype=self.perturbed_value).to(device)
+        else:
+            new_inputs = inputs.to(dtype=self.perturbed_value)
+        return new_inputs

msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py ADDED Viewed

@@ -0,0 +1,28 @@
+import torch
+from msprobe.pytorch.free_benchmark import logger
+from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
+from msprobe.pytorch.free_benchmark.common.params import DataParams
+from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import (
+    NpuBaseLayer,
+)
+class NoChangeLayer(NpuBaseLayer):
+    def no_change(self, tensor_obj):
+        """
+        不对输入做任何改变、直接二次执行
+        """
+        self.is_added = True
+        return tensor_obj
+    def handle(self, params: DataParams) -> torch.Any:
+        """
+        对输入添加扰动并返回
+        """
+        logger.info_on_rank_0(
+            f"[msprobe] Free benchmark: Perturbation is "
+            f"{PerturbationMode.NO_CHANGE} of {self.api_name}."
+        )
+        params.perturbed_value = self.no_change(params.args[params.valid_input_index])
+        return self.perturbed_result(params)

msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py ADDED Viewed

@@ -0,0 +1,45 @@
+from abc import abstractmethod
+from typing import Any
+import torch
+from msprobe.pytorch.free_benchmark.common.params import DataParams
+from msprobe.pytorch.free_benchmark.perturbed_layers.base_layer import BaseLayer
+class NpuBaseLayer(BaseLayer):
+    def __init__(self, api_name: str) -> None:
+        super().__init__(api_name)
+        self.perturbed_value = None  # 扰动的元素
+        self.is_added = False  # 标记当前算子输入是否调整
+    @staticmethod
+    def perturbed_result(params: DataParams) -> Any:
+        args_front = params.args[: params.valid_input_index]
+        args_rear = params.args[params.valid_input_index + 1:]
+        # 此处会将有inplace属性的算子换为非inplace
+        if "inplace" in params.kwargs:
+            params.kwargs["inplace"] = False
+        params.perturbed_result = params.origin_func(
+            *args_front, params.perturbed_value, *args_rear, **params.kwargs
+        )
+        return params.perturbed_result
+    @abstractmethod
+    def handle(self, params: DataParams) -> Any:
+        pass
+    def pre_check(self, tensor_obj):
+        """
+        检查张量是否符合标准(float类型且最大值大于对应精度最小值)
+        """
+        # 只针对第一个满足要求的添加扰动
+        if self.is_added:
+            return False
+        if not torch.is_floating_point(tensor_obj):
+            return False
+        if not self._check_details(tensor_obj):
+            return False
+        return True
+    def _check_details(self, tensor_obj):
+        return True

msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py ADDED Viewed

@@ -0,0 +1,19 @@
+import torch
+from msprobe.pytorch.free_benchmark import logger
+from msprobe.pytorch.free_benchmark.common.params import DataParams
+from msprobe.pytorch.free_benchmark.common.utils import Tools
+from msprobe.pytorch.free_benchmark.common.enums import DeviceType
+from msprobe.pytorch.free_benchmark.perturbed_layers.base_layer import BaseLayer
+class CpuLayer(BaseLayer):
+    def handle(self, params: DataParams) -> torch.Any:
+        logger.info_on_rank_0(
+            f"[msprobe] Free benchmark: Perturbation is to_cpu of {self.api_name}."
+        )
+        new_args = Tools.convert_device_and_dtype(params.args, DeviceType.CPU, change_dtype=True)
+        new_kwargs = Tools.convert_device_and_dtype(params.kwargs, DeviceType.CPU, change_dtype=True)
+        params.perturbed_result = params.origin_func(*new_args, **new_kwargs)
+        return params.perturbed_result

msprobe/pytorch/free_benchmark/result_handlers/__init__.py ADDED Viewed

File without changes

msprobe/pytorch/free_benchmark/result_handlers/base_handler.py ADDED Viewed

@@ -0,0 +1,203 @@
+import math
+from abc import ABC, abstractmethod
+from typing import Any, Optional, Tuple
+import torch
+from msprobe.core.common.const import Const
+from msprobe.pytorch.free_benchmark import logger
+from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
+from msprobe.pytorch.free_benchmark.common.enums import (
+    FuzzThreshold,
+    NormType,
+    PerturbationMode,
+)
+from msprobe.pytorch.free_benchmark.common.params import (
+    DataParams,
+    HandlerParams,
+    make_unequal_row,
+)
+from msprobe.pytorch.free_benchmark.common.utils import Tools, TorchC
+class FuzzHandler(ABC):
+    def __init__(self, params: HandlerParams) -> None:
+        self.params = params
+        self.unequal_rows = []
+    @staticmethod
+    def pre_process(origin_ouput, perturbed_output):
+        if (
+            isinstance(origin_ouput, tuple)
+            and hasattr(origin_ouput, "values")
+            and hasattr(origin_ouput, "indices")
+        ):
+            origin_ouput = origin_ouput.values
+            perturbed_output = perturbed_output.values
+        if hasattr(perturbed_output, "dtype"):
+            abs_tol = ThresholdConfig.ABS_TOL_VALUE_DICT.get(perturbed_output.dtype)
+        else:
+            abs_tol = FuzzThreshold.F32_THD.value
+        return (
+            origin_ouput.to(perturbed_output.dtype).to(perturbed_output.device),
+            perturbed_output,
+            abs_tol,
+        )
+    @staticmethod
+    def convert_overflow_ratio_to_consistent(ratio):
+        if math.isnan(ratio) or math.isinf(ratio):
+            return ThresholdConfig.COMP_CONSISTENT
+        return ratio
+    @abstractmethod
+    def get_threshold(self, dtype):
+        pass
+    @abstractmethod
+    def handle(self, data_params: DataParams) -> Any:
+        pass
+    def get_ratio_from_specific_norm(
+            self, origin_output, perturbed_output, norm_type, abs_tol
+    ):
+        if norm_type == NormType.ENDLESS_NORM:
+            return self.get_endless_norm(origin_output, perturbed_output, abs_tol)
+        return ThresholdConfig.COMP_CONSISTENT
+    def get_endless_norm(self, origin_output, perturbed_output, abs_tol):
+        ratio_tensor1 = TorchC.where(
+            TorchC.gt(TorchC.abs(perturbed_output), abs_tol),
+            TorchC.div(
+                TorchC.abs(origin_output),
+                TorchC.add(TorchC.abs(perturbed_output), abs_tol),
+            ),
+            1,
+        )
+        ratio_tensor2 = TorchC.where(
+            TorchC.gt(TorchC.abs(origin_output), abs_tol),
+            TorchC.div(
+                TorchC.abs(perturbed_output),
+                TorchC.add(TorchC.abs(origin_output), abs_tol),
+            ),
+            1,
+        )
+        norm1 = self.convert_overflow_ratio_to_consistent(
+            TorchC.max(ratio_tensor1).item()
+        )
+        norm2 = self.convert_overflow_ratio_to_consistent(
+            TorchC.max(ratio_tensor2).item()
+        )
+        norm3 = self.convert_overflow_ratio_to_consistent(
+            TorchC.min(ratio_tensor1).item()
+        )
+        if norm3 < 0:
+            ratio = ThresholdConfig.SYMBOL_FLIPPING
+        else:
+            ratio = max(norm1, norm2)
+        return ratio
+    def ratio_calculate(self, origin_output, perturbed_output, norm_type) -> float:
+        try:
+            origin_output, perturbed_output, abs_tol = self.pre_process(
+                origin_output, perturbed_output
+            )
+        except Exception as e:
+            logger.warning_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.params.api_name}, "
+                f"when computing ratio,"
+                f" y1 or y2 dtype is not supported {e}"
+            )
+            return ThresholdConfig.COMP_NAN
+        if self.params.fuzz_stage == Const.BACKWARD:
+            abs_tol = ThresholdConfig.BACKWARD_OUTPUT_LOWER_BOUND
+        else:
+            abs_tol = abs_tol ** 0.5
+        return self.get_ratio_from_specific_norm(
+            origin_output, perturbed_output, norm_type, abs_tol
+        )
+    def npu_compare(
+            self, origin_output, perturbed_output
+    ) -> Tuple[bool, Optional[float]]:
+        if isinstance(perturbed_output, int):
+            return origin_output == perturbed_output, None
+        elif isinstance(perturbed_output, float):
+            if perturbed_output == 0:
+                origin_output += FuzzThreshold.F32_THD
+                perturbed_output += FuzzThreshold.F32_THD
+            return (
+                math.isclose(origin_output, perturbed_output),
+                origin_output / perturbed_output,
+            )
+        elif not isinstance(perturbed_output, torch.Tensor):
+            logger.warning_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.params.api_name} "
+                f"The compare for output type {type(perturbed_output)} is not supported"
+            )
+        threshold = self.get_threshold(Tools.get_first_tensor_dtype(origin_output))
+        ratio = self.ratio_calculate(
+            origin_output, perturbed_output, norm_type=NormType.ENDLESS_NORM
+        )
+        if ratio == ThresholdConfig.SYMBOL_FLIPPING:
+            is_consistent = False
+        else:
+            is_consistent = threshold >= ratio >= 1 / threshold
+        return is_consistent, ratio
+    def cmp_output_npu(self, data_params: DataParams):
+        npu_consistent = True
+        max_fuzz_ratio = 0
+        try:
+            if isinstance(data_params.original_result, torch.Tensor):
+                is_consistent, ratio = self.npu_compare(
+                    data_params.original_result, data_params.perturbed_result
+                )
+                npu_consistent = is_consistent
+                max_fuzz_ratio = (
+                    max_fuzz_ratio if ratio is None else max(max_fuzz_ratio, ratio)
+                )
+                data_params.is_consistent = is_consistent and data_params.is_consistent
+                if not is_consistent and data_params.grad_unequal_flag:
+                    self.unequal_rows.append(
+                        make_unequal_row(data_params, self.params, ratio=ratio)
+                    )
+            elif isinstance(data_params.original_result, (list, tuple)):
+                for index_, origin_item in enumerate(data_params.original_result):
+                    is_consistent, ratio = self.npu_compare(
+                        origin_item, data_params.perturbed_result[index_]
+                    )
+                    npu_consistent = npu_consistent and is_consistent
+                    max_fuzz_ratio = (
+                        max_fuzz_ratio if ratio is None else max(max_fuzz_ratio, ratio)
+                    )
+                    data_params.is_consistent = (
+                            is_consistent and data_params.is_consistent
+                    )
+                    if not is_consistent and data_params.grad_unequal_flag:
+                        self.unequal_rows.append(
+                            make_unequal_row(
+                                data_params, self.params, ratio=ratio, index=index_
+                            )
+                        )
+        except Exception as e:
+            logger.warning_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.params.api_name}, "
+                f"when campare the result exception raise {e}"
+            )
+        return npu_consistent, max_fuzz_ratio
+    def get_unequal_rows(self):
+        return self.unequal_rows
+    def _get_default_threshold(self, dtype):
+        if self.params.pert_mode == PerturbationMode.NO_CHANGE:
+            threshold = ThresholdConfig.COMP_CONSISTENT
+        else:
+            threshold = ThresholdConfig.DTYPE_PER_THD.get(
+                dtype, ThresholdConfig.DTYPE_PER_THD.get(torch.float32)
+            )
+        return threshold

msprobe/pytorch/free_benchmark/result_handlers/check_handler.py ADDED Viewed

@@ -0,0 +1,39 @@
+from typing import Any
+from msprobe.pytorch.free_benchmark import logger
+from msprobe.pytorch.free_benchmark.common.enums import DeviceType
+from msprobe.pytorch.free_benchmark.common.params import DataParams, make_unequal_row
+from msprobe.pytorch.free_benchmark.common.utils import Tools
+from msprobe.pytorch.free_benchmark.compare.single_benchmark import SingleCompare
+from msprobe.pytorch.free_benchmark.result_handlers.base_handler import FuzzHandler
+class CheckerHandler(FuzzHandler):
+    def other_compare(self, data_params: DataParams) -> bool:
+        is_consistent = SingleCompare().compare_seq(
+                    data_params.original_result, data_params.perturbed_result
+                )
+        if not is_consistent:
+            self.unequal_rows.append(
+                make_unequal_row(data_params, self.params)
+            )
+    def get_threshold(self, dtype):
+        return self._get_default_threshold(dtype)
+    def handle(self, data_params: DataParams) -> Any:
+        if isinstance(data_params.perturbed_result, bool) or not Tools.is_float_tensor(
+            data_params.perturbed_result
+        ):
+            return data_params.original_result
+        try:
+            if self.params.fuzz_device == DeviceType.NPU:
+                self.cmp_output_npu(data_params)
+            else:
+                self.other_compare(data_params)
+        except Exception as e:
+            logger.warning_on_rank_0(
+                f"[msprobe] Free Benchmark: For {self.params.api_name}, "
+                f"when campare the result exception raise {e}"
+            )
+        return data_params.original_result