PyPI - mindstudio-probe - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
msprobe/README.md +39 -3
msprobe/config.json +1 -3
msprobe/core/advisor/advisor.py +8 -3
msprobe/core/common/const.py +113 -13
msprobe/core/common/exceptions.py +25 -3
msprobe/core/common/file_utils.py +150 -26
msprobe/core/common/inplace_op_checker.py +15 -0
msprobe/core/common/log.py +27 -9
msprobe/core/common/utils.py +182 -69
msprobe/core/common_config.py +44 -15
msprobe/core/compare/acc_compare.py +207 -142
msprobe/core/compare/check.py +2 -5
msprobe/core/compare/compare_cli.py +21 -4
msprobe/core/compare/highlight.py +124 -55
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
msprobe/core/compare/npy_compare.py +52 -23
msprobe/core/compare/utils.py +272 -247
msprobe/core/data_dump/data_collector.py +13 -11
msprobe/core/data_dump/data_processor/base.py +46 -16
msprobe/core/data_dump/data_processor/mindspore_processor.py +4 -4
msprobe/core/data_dump/data_processor/pytorch_processor.py +156 -59
msprobe/core/data_dump/scope.py +113 -34
msprobe/core/grad_probe/constant.py +27 -13
msprobe/core/grad_probe/grad_compare.py +18 -1
msprobe/core/grad_probe/utils.py +30 -2
msprobe/core/overflow_check/abnormal_scene.py +185 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +10 -0
msprobe/docs/02.config_introduction.md +49 -22
msprobe/docs/03.config_examples.md +2 -9
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +3 -1
msprobe/docs/06.data_dump_MindSpore.md +157 -90
msprobe/docs/07.accuracy_checker_PyTorch.md +12 -12
msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +19 -13
msprobe/docs/11.accuracy_compare_MindSpore.md +104 -13
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
msprobe/docs/17.grad_probe.md +5 -6
msprobe/docs/19.monitor.md +468 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +386 -0
msprobe/docs/22.visualization_MindSpore.md +384 -0
msprobe/docs/23.tool_function_introduction.md +28 -0
msprobe/docs/FAQ.md +3 -0
msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/mindspore/__init__.py +15 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +113 -145
msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
msprobe/mindspore/api_accuracy_checker/main.py +27 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +33 -12
msprobe/mindspore/common/const.py +33 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +43 -4
msprobe/mindspore/compare/distributed_compare.py +22 -22
msprobe/mindspore/compare/ms_compare.py +271 -248
msprobe/mindspore/compare/ms_graph_compare.py +81 -47
msprobe/mindspore/debugger/debugger_config.py +4 -1
msprobe/mindspore/debugger/precision_debugger.py +7 -1
msprobe/mindspore/dump/dump_tool_factory.py +3 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +12 -2
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +13 -16
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +25 -0
msprobe/mindspore/dump/jit_dump.py +17 -5
msprobe/mindspore/dump/kernel_graph_dump.py +2 -4
msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +145 -39
msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
msprobe/mindspore/free_benchmark/common/utils.py +19 -4
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
msprobe/mindspore/grad_probe/global_context.py +28 -8
msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +24 -10
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/ms_config.py +22 -15
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +2 -4
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +36 -30
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +24 -7
msprobe/pytorch/__init__.py +3 -2
msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +6 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +19 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +13 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +77 -53
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +15 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +100 -6
msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
msprobe/pytorch/bench_functions/swiglu.py +10 -2
msprobe/pytorch/common/parse_json.py +6 -6
msprobe/pytorch/common/utils.py +56 -5
msprobe/pytorch/compare/distributed_compare.py +8 -9
msprobe/pytorch/compare/pt_compare.py +8 -6
msprobe/pytorch/debugger/debugger_config.py +19 -15
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +15 -0
msprobe/pytorch/free_benchmark/common/params.py +8 -1
msprobe/pytorch/free_benchmark/common/utils.py +26 -4
msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -3
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +10 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
msprobe/pytorch/hook_module/wrap_functional.py +14 -12
msprobe/pytorch/module_processer.py +2 -5
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +340 -0
msprobe/pytorch/monitor/distributed/__init__.py +0 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +870 -0
msprobe/pytorch/monitor/module_metric.py +193 -0
msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
msprobe/pytorch/monitor/optimizer_collect.py +295 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
msprobe/pytorch/monitor/utils.py +250 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +29 -38
msprobe/pytorch/online_dispatch/dispatch.py +50 -25
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +53 -32
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
msprobe/pytorch/online_dispatch/utils.py +49 -21
msprobe/pytorch/parse_tool/lib/compare.py +12 -18
msprobe/pytorch/parse_tool/lib/config.py +1 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +1 -2
msprobe/pytorch/parse_tool/lib/utils.py +16 -35
msprobe/pytorch/parse_tool/lib/visualization.py +2 -0
msprobe/pytorch/pt_config.py +31 -8
msprobe/pytorch/service.py +15 -5
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +165 -0
msprobe/visualization/builder/msprobe_adapter.py +205 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +130 -0
msprobe/visualization/compare/mode_adapter.py +211 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +124 -0
msprobe/visualization/graph/graph.py +200 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +214 -0
msprobe/visualization/utils.py +232 -0
mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
msprobe/docs/04.acl_config_examples.md +0 -78
msprobe/mindspore/compare/layer_mapping.py +0 -146
msprobe/mindspore/compare/modify_mapping.py +0 -107
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0

msprobe/pytorch/free_benchmark/compare/grad_saver.py CHANGED Viewed

@@ -102,8 +102,13 @@ class GradSaver:
     def check_grad_input(self, origin_grad, new_grad_index):
         if self.perturbed_grad_input is None:
             raise FreeBenchmarkException(
-                FreeBenchmarkException.InvalidGrad,
-                f"grad not exists : {self.api_name}.",
+                FreeBenchmarkException.InvalidPerturbedOutput,
+                f"perturbed grad not exists for {self.api_name}.",
+            )
+        if len(self.perturbed_grad_input) <= new_grad_index:
+            raise FreeBenchmarkException(
+                FreeBenchmarkException.InvalidPerturbedOutput,
+                f"perturbed grad index {new_grad_index} is out of bounds for {self.api_name}.",
             )
         with torch.no_grad():
             perturbed_grad = self.perturbed_grad_input[new_grad_index].to(
@@ -111,7 +116,7 @@ class GradSaver:
             )
         if origin_grad.shape != perturbed_grad.shape:
             raise FreeBenchmarkException(
-                FreeBenchmarkException.InvalidGrad,
+                FreeBenchmarkException.InvalidPerturbedOutput,
                 f"grad shapes are inconsistent. api:{self.handler_params.api_name}."
                 f"origin:{origin_grad.shape}, perturbation: {perturbed_grad.shape}",
             )
@@ -164,6 +169,18 @@ class GradSaver:
             index_ = 0
             for object_ in inner_args:
                 if object_ is CommonField.HOLD_PLACE:
+                    if index_ >= len(inputs):
+                        err_msg = (
+                            f"[msprobe] Free benchmark: When getting input from vjp, "
+                            f" the input index ({index_}) is out of bounds ({len(inputs)})."
+                        )
+                        logger.error_log_with_exp(
+                            err_msg,
+                            FreeBenchmarkException(
+                                FreeBenchmarkException.InvalidGrad,
+                                error_info=err_msg,
+                            ),
+                        )
                     _real_input.append(inputs[index_])
                     index_ += 1
                 else:

msprobe/pytorch/free_benchmark/compare/single_benchmark.py CHANGED Viewed

@@ -16,6 +16,7 @@
 import math
 import torch
+from msprobe.core.common.utils import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
 from msprobe.pytorch.free_benchmark.common.utils import TorchC
@@ -67,6 +68,7 @@ class SingleCompare:
                 return False
         return True
+    @recursion_depth_decorator("FreeBenchmark: SingleCompare.compare_seq")
     def compare_seq(self, actual, golden):
         if isinstance(golden, torch.Tensor):
             return self.compare_tensor_seq(actual, golden)

msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 import torch
+from msprobe.core.common.utils import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
@@ -26,6 +27,7 @@ from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import
 class AddNoiseLayer(NpuBaseLayer):
+    @recursion_depth_decorator("FreeBenchmark: AddNoiseLayer.add_noise")
     def add_noise(self, tensor_obj):
         if isinstance(tensor_obj, torch.Tensor):
             self.perturbed_value = ThresholdConfig.PERTURBATION_VALUE_DICT.get(
@@ -99,7 +101,7 @@ class AddNoiseLayer(NpuBaseLayer):
         if max_val < abs_tol:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
-                f"Maximun value is less than the  minimun threshold. Cancel add noise."
+                f"Maximun value is less than the minimun threshold. Cancel add noise."
             )
             return False
         return True

msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 import torch
+from msprobe.core.common.utils import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
@@ -31,6 +32,7 @@ class BitNoiseLayer(NpuBaseLayer):
         self.bit_tail: int = 1
         self.bit_type = None
+    @recursion_depth_decorator("FreeBenchmark: BitNoiseLayer.add_bit_noise")
     def add_bit_noise(self, tensor_obj):
         """
         对输入添加噪声
@@ -79,14 +81,14 @@ class BitNoiseLayer(NpuBaseLayer):
         判断是否需要添加扰动,  bit翻转
         """
         if not self.bit_type:
-            logger.info_on_rank_0(
+            logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
                 f"dtype unsupported. Cancel perturbation."
             )
             return False
         if tensor_obj.numel() == 0:
             logger.warning_on_rank_0(
-                f"[msprobe] Free benchmark: For {self.api_name}, tensor shape must > 0"
+                f"[msprobe] Free benchmark: For {self.api_name}, tensor shape must > 0."
                 f" Cancel adding noise."
             )
             return False
@@ -102,9 +104,9 @@ class BitNoiseLayer(NpuBaseLayer):
             )
             max_val = TorchC.max(TorchC.abs(tensor_obj.to(torch.float32))).item()
         if max_val < abs_tol:
-            logger.info_on_rank_0(
+            logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
-                f"Maximun value is less than the  minimun threshold. Cancel add noise."
+                f"Maximun value is less than the minimun threshold. Cancel add noise."
             )
             return False
         return True

msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 import torch
+from msprobe.core.common.utils import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
 from msprobe.pytorch.free_benchmark.common.params import DataParams
@@ -29,6 +30,7 @@ class ChangeValueLayer(NpuBaseLayer):
         self.head: int = 0
         self.tail: int = -1
+    @recursion_depth_decorator("FreeBenchmark: ChangeValueLayer.change_value")
     def change_value(self, tensor_obj):
         """
         交换张量首尾

msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import torch
 from msprobe.core.common.const import Const
+from msprobe.core.common.utils import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import CommonField
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
@@ -26,6 +27,9 @@ from msprobe.pytorch.free_benchmark.perturbed_layers.npu.npu_base_layser import
 class ImprovePrecisionLayer(NpuBaseLayer):
+    @recursion_depth_decorator(
+        "FreeBenchmark: ImprovePrecisionLayer.improve_tensor_precision"
+    )
     def improve_tensor_precision(self, tensor_obj):
         if (
             isinstance(tensor_obj, torch.Tensor)

msprobe/pytorch/free_benchmark/result_handlers/base_handler.py CHANGED Viewed

@@ -20,6 +20,7 @@ from typing import Any, Optional, Tuple
 import numpy as np
 import torch
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import FreeBenchmarkException
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
 from msprobe.pytorch.free_benchmark.common.enums import (
@@ -113,6 +114,14 @@ class FuzzHandler(ABC):
         origin_output_chunks, perturbed_output_chunks = (
             self.tensor_split_for_error_calculate(origin_output, perturbed_output)
         )
+        if len(origin_output_chunks) != len(perturbed_output_chunks):
+            err_msg = (
+                f"For {self.params.api_name}, the number of compare tensor chunks is different: "
+                f"{len(origin_output_chunks)} != {len(perturbed_output_chunks)}. please check!"
+            )
+            raise FreeBenchmarkException(
+                FreeBenchmarkException.OutputIndexError, err_msg
+            )
         norm1 = -np.inf
         norm2 = -np.inf
         norm3 = np.inf
@@ -189,6 +198,7 @@ class FuzzHandler(ABC):
                 f"[msprobe] Free Benchmark: For {self.params.api_name} "
                 f"The compare for output type {type(perturbed_output)} is not supported"
             )
+            return True, 1
         threshold = self.get_threshold(Tools.get_first_tensor_dtype(origin_output))
         ratio = self.ratio_calculate(

msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py CHANGED Viewed

@@ -15,10 +15,11 @@
 from typing import Any
+from msprobe.core.common.exceptions import FreeBenchmarkException
+from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.params import DataParams
 from msprobe.pytorch.free_benchmark.common.utils import Tools
 from msprobe.pytorch.free_benchmark.result_handlers.base_handler import FuzzHandler
-from msprobe.pytorch.free_benchmark import logger
 class FixHandler(FuzzHandler):
@@ -31,9 +32,9 @@ class FixHandler(FuzzHandler):
             return Tools.convert_fuzz_output_to_origin(
                 data_params.original_result, data_params.perturbed_result
             )
-        except Exception as e:
-            logger.warning_on_rank_0(
+        except FreeBenchmarkException as e:
+            logger.warning(
                 f"[msprobe] Free Benchmark: For {self.params.api_name} "
-                f"Fix output failed. "
+                f"Fix output failed because of: \n{e}"
             )
-        return data_params.original_result
+            return data_params.original_result

msprobe/pytorch/grad_probe/grad_monitor.py CHANGED Viewed

@@ -1,15 +1,31 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 from collections import defaultdict
 import torch
-if int(torch.__version__.split('.')[0]) >= 2:
-    from torch.optim.optimizer import register_optimizer_step_pre_hook
-from msprobe.pytorch.grad_probe.grad_stat_csv import GradStatCsv
-from msprobe.core.grad_probe.utils import check_numeral_list_ascend, data_in_list_target
+from msprobe.core.common.file_utils import remove_path, save_npy, write_csv, create_directory
 from msprobe.core.grad_probe.constant import level_adp
+from msprobe.core.grad_probe.utils import check_numeral_list_ascend, data_in_list_target
 from msprobe.pytorch.common.log import logger
-from msprobe.core.common.file_utils import remove_path, save_npy, write_csv, create_directory
 from msprobe.pytorch.common.utils import get_rank_id, print_rank_0
+from msprobe.pytorch.grad_probe.grad_stat_csv import GradStatCsv
+if int(torch.__version__.split('.')[0]) >= 2:
+    from torch.optim.optimizer import register_optimizer_step_pre_hook
 class GradientMonitor:
@@ -75,7 +91,7 @@ class GradientMonitor:
                 output_lines.append(grad_info)
                 if self._level_adp["have_grad_direction"]:
                     GradientMonitor.save_grad_direction(param_name, grad,
-                                                    f'{self._output_path}/rank{self._rank}/step{self._step}')
+                                                        f'{self._output_path}/rank{self._rank}/step{self._step}')
             output_dirpath = os.path.join(self._output_path, f"rank{getattr(self, '_rank')}")
             if not os.path.isdir(output_dirpath):
                 create_directory(output_dirpath)
@@ -87,5 +103,6 @@ class GradientMonitor:
             output_lines.insert(0, header_result)
             write_csv(output_lines, output_path)
             logger.info(f"write grad data to {output_path}")
         if int(torch.__version__.split('.')[0]) >= 2:
             register_optimizer_step_pre_hook(optimizer_pre_step_hook)

msprobe/pytorch/grad_probe/grad_stat_csv.py CHANGED Viewed

@@ -1,11 +1,27 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from abc import ABC, abstractmethod
 from collections import namedtuple
 import hashlib
+from functools import wraps
 import torch
 from msprobe.core.grad_probe.constant import GradConst
-CSV_header_input = namedtuple("CSV_header_input", ["bounds"])
-CSV_content_input = namedtuple("CSV_content_input", ["grad", "bounds"])
+CsvHeaderInput = namedtuple("CsvHeaderInput", ["bounds"])
+CsvContentInput = namedtuple("CsvContentInput", ["grad", "bounds"])
 class GradStatCsv:
@@ -15,7 +31,7 @@ class GradStatCsv:
     def generate_csv_header(level, bounds):
         header = ["param_name"]
         for key in level["header"]:
-            csv_header_input = CSV_header_input(bounds=bounds)
+            csv_header_input = CsvHeaderInput(bounds=bounds)
             header.extend(GradStatCsv.csv[key].generate_csv_header(csv_header_input))
         return header
@@ -23,7 +39,7 @@ class GradStatCsv:
     def generate_csv_line(param_name, level, grad, bounds):
         line = [param_name]
         for key in level["header"]:
-            csv_content_input = CSV_content_input(grad=grad, bounds=bounds)
+            csv_content_input = CsvContentInput(grad=grad, bounds=bounds)
             line.extend(GradStatCsv.csv[key].generate_csv_content(csv_content_input))
         return line
@@ -37,20 +53,24 @@ def register_csv_item(key, cls=None):
 class CsvItem(ABC):
+    @staticmethod
     @abstractmethod
     def generate_csv_header(csv_header_input):
         pass
+    @staticmethod
     @abstractmethod
     def generate_csv_content(csv_content_input):
         pass
 @register_csv_item(GradConst.MD5)
-class CSV_md5(CsvItem):
+class CsvMd5(CsvItem):
+    @staticmethod
     def generate_csv_header(csv_header_input):
         return ["MD5"]
+    @staticmethod
     def generate_csv_content(csv_content_input):
         grad = csv_content_input.grad
         tensor_bytes = grad.cpu().detach().float().numpy().tobytes()
@@ -59,7 +79,8 @@ class CSV_md5(CsvItem):
 @register_csv_item(GradConst.DISTRIBUTION)
-class CSV_distribution(CsvItem):
+class CsvDistribution(CsvItem):
+    @staticmethod
     def generate_csv_header(csv_header_input):
         bounds = csv_header_input.bounds
         intervals = []
@@ -73,6 +94,7 @@ class CSV_distribution(CsvItem):
         return intervals
+    @staticmethod
     def generate_csv_content(csv_content_input):
         grad = csv_content_input.grad
         bounds = csv_content_input.bounds
@@ -90,40 +112,48 @@ class CSV_distribution(CsvItem):
 @register_csv_item(GradConst.MAX)
-class CSV_max(CsvItem):
+class CsvMax(CsvItem):
+    @staticmethod
     def generate_csv_header(csv_header_input):
         return ["max"]
+    @staticmethod
     def generate_csv_content(csv_content_input):
         grad = csv_content_input.grad
         return [torch.max(grad).cpu().detach().float().numpy().tolist()]
 @register_csv_item(GradConst.MIN)
-class CSV_min(CsvItem):
+class CsvMin(CsvItem):
+    @staticmethod
     def generate_csv_header(csv_header_input):
         return ["min"]
+    @staticmethod
     def generate_csv_content(csv_content_input):
         grad = csv_content_input.grad
         return [torch.min(grad).cpu().detach().float().numpy().tolist()]
 @register_csv_item(GradConst.NORM)
-class CSV_norm(CsvItem):
+class CsvNorm(CsvItem):
+    @staticmethod
     def generate_csv_header(csv_header_input):
         return ["norm"]
+    @staticmethod
     def generate_csv_content(csv_content_input):
         grad = csv_content_input.grad
         return [torch.norm(grad).cpu().detach().float().numpy().tolist()]
 @register_csv_item(GradConst.SHAPE)
-class CSV_shape(CsvItem):
+class CsvShape(CsvItem):
+    @staticmethod
     def generate_csv_header(csv_header_input):
         return ["shape"]
+    @staticmethod
     def generate_csv_content(csv_content_input):
         grad = csv_content_input.grad
         return [list(grad.shape)]

msprobe/pytorch/hook_module/support_wrap_ops.yaml CHANGED Viewed

@@ -1130,6 +1130,7 @@ torch_npu:
   - npu_prompt_flash_attention
   - npu_lstm
   - npu_apply_adam
+  - npu_apply_adam_w
 aten:
   - signbit

msprobe/pytorch/hook_module/wrap_functional.py CHANGED Viewed

@@ -30,32 +30,34 @@ def remove_dropout():
         from torch import _VF
         from torch.overrides import has_torch_function_unary, handle_torch_function
-        def function_dropout(input: torch.Tensor, p: float = 0.5, training: bool = True,
+        def function_dropout(input_tensor: torch.Tensor, p: float = 0.5, training: bool = True,
                              inplace: bool = False) -> torch.Tensor:
-            if has_torch_function_unary(input):
+            if has_torch_function_unary(input_tensor):
                 return handle_torch_function(
-                    function_dropout, (input,), input, p=0., training=training, inplace=inplace)
+                    function_dropout, (input_tensor,), input_tensor, p=0., training=training, inplace=inplace)
             if p < 0.0 or p > 1.0:
                 raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
-            return _VF.dropout_(input, 0., training) if inplace else _VF.dropout(input, 0., training)
+            return _VF.dropout_(input_tensor, 0., training) if inplace else _VF.dropout(input_tensor, 0., training)
-        def function_dropout2d(input: torch.Tensor, p: float = 0.5, training: bool = True,
+        def function_dropout2d(input_tensor: torch.Tensor, p: float = 0.5, training: bool = True,
                                inplace: bool = False) -> torch.Tensor:
-            if has_torch_function_unary(input):
+            if has_torch_function_unary(input_tensor):
                 return handle_torch_function(
-                    function_dropout2d, (input,), input, p=0., training=training, inplace=inplace)
+                    function_dropout2d, (input_tensor,), input_tensor, p=0., training=training, inplace=inplace)
             if p < 0.0 or p > 1.0:
                 raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
-            return _VF.feature_dropout_(input, 0., training) if inplace else _VF.feature_dropout(input, 0., training)
+            return _VF.feature_dropout_(input_tensor, 0., training) if inplace else _VF.feature_dropout(input_tensor,
+                                                                                                        0., training)
-        def function_dropout3d(input: torch.Tensor, p: float = 0.5, training: bool = True,
+        def function_dropout3d(input_tensor: torch.Tensor, p: float = 0.5, training: bool = True,
                                inplace: bool = False) -> torch.Tensor:
-            if has_torch_function_unary(input):
+            if has_torch_function_unary(input_tensor):
                 return handle_torch_function(
-                    function_dropout3d, (input,), input, p=0., training=training, inplace=inplace)
+                    function_dropout3d, (input_tensor,), input_tensor, p=0., training=training, inplace=inplace)
             if p < 0.0 or p > 1.0:
                 raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
-            return _VF.feature_dropout_(input, 0., training) if inplace else _VF.feature_dropout(input, 0., training)
+            return _VF.feature_dropout_(input_tensor, 0., training) if inplace else _VF.feature_dropout(input_tensor,
+                                                                                                        0., training)
         F.dropout = function_dropout
         F.dropout2d = function_dropout2d

msprobe/pytorch/module_processer.py CHANGED Viewed

@@ -17,7 +17,7 @@ from functools import wraps
 import torch
 from msprobe.core.common.const import Const
-from msprobe.core.data_dump.scope import ModuleRangeScope
+from msprobe.core.data_dump.scope import ModuleRangeScope, MixRangeScope
 from torch.utils.hooks import BackwardHook
 torch_version_above_or_equal_2 = torch.__version__.split('+')[0] >= '2.0'
@@ -30,10 +30,7 @@ class ModuleProcesser:
     module_node = {}
     def __init__(self, scope):
-        if isinstance(scope, ModuleRangeScope):
-            self.scope = scope
-        else:
-            self.scope = None
+        self.scope = scope if isinstance(scope, (ModuleRangeScope, MixRangeScope)) else None
         BackwardHook.setup_input_hook = ModuleProcesser.clone_return_value(BackwardHook.setup_input_hook)
         BackwardHook.setup_output_hook = ModuleProcesser.clone_return_value(BackwardHook.setup_output_hook)
         BackwardHook.setup_output_hook = ModuleProcesser.filter_tensor_and_tuple(BackwardHook.setup_output_hook)

mindstudio-probe 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl