PyPI - mindstudio-probe - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

mindstudio-probe 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (249) hide show

{mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.3.dist-info}/METADATA +5 -1
mindstudio_probe-1.0.3.dist-info/RECORD +272 -0
msprobe/README.md +78 -23
msprobe/__init__.py +1 -0
msprobe/config/README.md +182 -40
msprobe/config/config.json +22 -0
msprobe/core/__init__.py +0 -0
msprobe/{pytorch → core}/advisor/advisor.py +3 -3
msprobe/{pytorch → core}/advisor/advisor_result.py +2 -2
msprobe/core/common/const.py +82 -5
msprobe/core/common/exceptions.py +30 -18
msprobe/core/common/file_check.py +19 -1
msprobe/core/common/log.py +15 -1
msprobe/core/common/utils.py +130 -30
msprobe/core/common_config.py +32 -19
msprobe/core/compare/acc_compare.py +299 -0
msprobe/core/compare/check.py +95 -0
msprobe/core/compare/compare_cli.py +49 -0
msprobe/core/compare/highlight.py +222 -0
msprobe/core/compare/multiprocessing_compute.py +149 -0
msprobe/{pytorch → core}/compare/npy_compare.py +55 -4
msprobe/core/compare/utils.py +429 -0
msprobe/core/data_dump/data_collector.py +39 -35
msprobe/core/data_dump/data_processor/base.py +85 -37
msprobe/core/data_dump/data_processor/factory.py +5 -7
msprobe/core/data_dump/data_processor/mindspore_processor.py +198 -0
msprobe/core/data_dump/data_processor/pytorch_processor.py +94 -51
msprobe/core/data_dump/json_writer.py +11 -11
msprobe/core/grad_probe/__init__.py +0 -0
msprobe/core/grad_probe/constant.py +71 -0
msprobe/core/grad_probe/grad_compare.py +175 -0
msprobe/core/grad_probe/utils.py +52 -0
msprobe/doc/grad_probe/grad_probe.md +207 -0
msprobe/doc/grad_probe/img/image-1.png +0 -0
msprobe/doc/grad_probe/img/image-2.png +0 -0
msprobe/doc/grad_probe/img/image-3.png +0 -0
msprobe/doc/grad_probe/img/image-4.png +0 -0
msprobe/doc/grad_probe/img/image.png +0 -0
msprobe/mindspore/api_accuracy_checker/__init__.py +0 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +246 -0
msprobe/mindspore/api_accuracy_checker/api_info.py +69 -0
msprobe/mindspore/api_accuracy_checker/api_runner.py +152 -0
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +197 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +224 -0
msprobe/mindspore/api_accuracy_checker/main.py +16 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +114 -0
msprobe/mindspore/api_accuracy_checker/utils.py +63 -0
msprobe/mindspore/cell_processor.py +34 -0
msprobe/mindspore/common/const.py +87 -0
msprobe/mindspore/common/log.py +38 -0
msprobe/mindspore/common/utils.py +57 -0
msprobe/mindspore/compare/distributed_compare.py +75 -0
msprobe/mindspore/compare/ms_compare.py +117 -0
msprobe/mindspore/compare/ms_graph_compare.py +317 -0
msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -0
msprobe/mindspore/debugger/debugger_config.py +38 -15
msprobe/mindspore/debugger/precision_debugger.py +79 -4
msprobe/mindspore/doc/compare.md +58 -0
msprobe/mindspore/doc/dump.md +158 -6
msprobe/mindspore/dump/dump_tool_factory.py +19 -22
msprobe/mindspore/dump/hook_cell/api_registry.py +104 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +53 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +925 -0
msprobe/mindspore/dump/hook_cell/wrap_functional.py +91 -0
msprobe/mindspore/dump/hook_cell/wrap_tensor.py +63 -0
msprobe/mindspore/dump/jit_dump.py +56 -0
msprobe/mindspore/dump/kernel_kbyk_dump.py +65 -0
msprobe/mindspore/free_benchmark/__init__.py +0 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +116 -0
msprobe/mindspore/free_benchmark/common/__init__.py +0 -0
msprobe/mindspore/free_benchmark/common/config.py +12 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +17 -0
msprobe/mindspore/free_benchmark/common/utils.py +71 -0
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -0
msprobe/mindspore/free_benchmark/decorator/__init__.py +0 -0
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +42 -0
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +107 -0
msprobe/mindspore/free_benchmark/handler/__init__.py +0 -0
msprobe/mindspore/free_benchmark/handler/base_handler.py +90 -0
msprobe/mindspore/free_benchmark/handler/check_handler.py +41 -0
msprobe/mindspore/free_benchmark/handler/fix_handler.py +36 -0
msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -0
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +67 -0
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +21 -0
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +63 -0
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +34 -0
msprobe/mindspore/free_benchmark/perturbation/no_change.py +12 -0
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +27 -0
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +33 -0
msprobe/mindspore/grad_probe/__init__.py +0 -0
msprobe/mindspore/grad_probe/global_context.py +91 -0
msprobe/mindspore/grad_probe/grad_analyzer.py +231 -0
msprobe/mindspore/grad_probe/grad_monitor.py +27 -0
msprobe/mindspore/grad_probe/grad_stat_csv.py +132 -0
msprobe/mindspore/grad_probe/hook.py +92 -0
msprobe/mindspore/grad_probe/utils.py +29 -0
msprobe/mindspore/ms_config.py +63 -15
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +17 -15
msprobe/mindspore/runtime.py +4 -0
msprobe/mindspore/service.py +354 -0
msprobe/mindspore/task_handler_factory.py +7 -4
msprobe/msprobe.py +66 -26
msprobe/pytorch/__init__.py +1 -1
msprobe/pytorch/api_accuracy_checker/common/config.py +21 -16
msprobe/pytorch/api_accuracy_checker/common/utils.py +1 -60
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +2 -5
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +46 -10
msprobe/pytorch/api_accuracy_checker/compare/compare.py +84 -48
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +8 -12
msprobe/pytorch/api_accuracy_checker/config.yaml +7 -1
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +15 -11
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +11 -15
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +16 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +193 -105
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +68 -1
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/__init__.py +0 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +202 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +324 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +204 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +218 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +10 -0
msprobe/pytorch/bench_functions/__init__.py +15 -0
msprobe/pytorch/bench_functions/apply_adam_w.py +28 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +19 -0
msprobe/pytorch/bench_functions/fast_gelu.py +55 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +6 -0
msprobe/pytorch/bench_functions/linear.py +12 -0
msprobe/pytorch/bench_functions/matmul_backward.py +48 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +421 -0
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +52 -0
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +26 -0
msprobe/pytorch/bench_functions/swiglu.py +55 -0
msprobe/pytorch/common/parse_json.py +3 -1
msprobe/pytorch/common/utils.py +83 -7
msprobe/pytorch/compare/distributed_compare.py +19 -64
msprobe/pytorch/compare/match.py +3 -6
msprobe/pytorch/compare/pt_compare.py +40 -0
msprobe/pytorch/debugger/debugger_config.py +11 -2
msprobe/pytorch/debugger/precision_debugger.py +34 -4
msprobe/pytorch/doc/api_accuracy_checker.md +57 -13
msprobe/pytorch/doc/api_accuracy_checker_online.md +187 -0
msprobe/pytorch/doc/dump.md +73 -20
msprobe/pytorch/doc/ptdbg_ascend_compare.md +75 -11
msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +3 -3
msprobe/pytorch/doc/run_overflow_check.md +1 -1
msprobe/pytorch/doc//321/206/320/247/320/260/321/206/320/260/320/227/321/206/320/255/320/226/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/205/320/254/342/225/221/321/206/320/251/320/277/321/211/320/272/320/234/321/210/320/277/320/221/321/205/320/242/320/234/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +151 -0
msprobe/pytorch/free_benchmark/common/constant.py +3 -0
msprobe/pytorch/free_benchmark/common/utils.py +4 -0
msprobe/pytorch/free_benchmark/compare/grad_saver.py +22 -26
msprobe/pytorch/free_benchmark/main.py +7 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +43 -29
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +0 -1
msprobe/pytorch/function_factory.py +75 -0
msprobe/pytorch/functional/dump_module.py +4 -4
msprobe/pytorch/grad_probe/__init__.py +0 -0
msprobe/pytorch/grad_probe/grad_monitor.py +90 -0
msprobe/pytorch/grad_probe/grad_stat_csv.py +129 -0
msprobe/pytorch/hook_module/hook_module.py +14 -3
msprobe/pytorch/hook_module/support_wrap_ops.yaml +2 -1
msprobe/pytorch/hook_module/utils.py +9 -9
msprobe/pytorch/hook_module/wrap_aten.py +20 -10
msprobe/pytorch/hook_module/wrap_distributed.py +10 -7
msprobe/pytorch/hook_module/wrap_functional.py +4 -7
msprobe/pytorch/hook_module/wrap_npu_custom.py +21 -10
msprobe/pytorch/hook_module/wrap_tensor.py +5 -6
msprobe/pytorch/hook_module/wrap_torch.py +5 -7
msprobe/pytorch/hook_module/wrap_vf.py +6 -8
msprobe/pytorch/module_processer.py +53 -13
msprobe/pytorch/online_dispatch/compare.py +4 -4
msprobe/pytorch/online_dispatch/dispatch.py +39 -41
msprobe/pytorch/online_dispatch/dump_compare.py +17 -47
msprobe/pytorch/online_dispatch/single_compare.py +5 -5
msprobe/pytorch/online_dispatch/utils.py +2 -43
msprobe/pytorch/parse_tool/lib/compare.py +31 -19
msprobe/pytorch/parse_tool/lib/config.py +2 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +4 -4
msprobe/pytorch/parse_tool/lib/utils.py +34 -80
msprobe/pytorch/parse_tool/lib/visualization.py +4 -3
msprobe/pytorch/pt_config.py +100 -6
msprobe/pytorch/service.py +104 -19
mindstudio_probe-1.0.1.dist-info/RECORD +0 -228
msprobe/mindspore/dump/api_kbk_dump.py +0 -55
msprobe/pytorch/compare/acc_compare.py +0 -1024
msprobe/pytorch/compare/highlight.py +0 -100
msprobe/test/core_ut/common/test_utils.py +0 -345
msprobe/test/core_ut/data_dump/test_data_collector.py +0 -47
msprobe/test/core_ut/data_dump/test_json_writer.py +0 -183
msprobe/test/core_ut/data_dump/test_scope.py +0 -151
msprobe/test/core_ut/test_common_config.py +0 -152
msprobe/test/core_ut/test_file_check.py +0 -218
msprobe/test/core_ut/test_log.py +0 -109
msprobe/test/mindspore_ut/test_api_kbk_dump.py +0 -51
msprobe/test/mindspore_ut/test_debugger_config.py +0 -42
msprobe/test/mindspore_ut/test_dump_tool_factory.py +0 -51
msprobe/test/mindspore_ut/test_kernel_graph_dump.py +0 -66
msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py +0 -63
msprobe/test/mindspore_ut/test_ms_config.py +0 -69
msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py +0 -51
msprobe/test/mindspore_ut/test_precision_debugger.py +0 -56
msprobe/test/mindspore_ut/test_task_handler_factory.py +0 -58
msprobe/test/pytorch_ut/advisor/test_advisor.py +0 -83
msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +0 -108
msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py +0 -39
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py +0 -112
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py +0 -77
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py +0 -125
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py +0 -10
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py +0 -43
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json +0 -179
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json +0 -63
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py +0 -99
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py +0 -115
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py +0 -72
msprobe/test/pytorch_ut/compare/test_acc_compare.py +0 -17
msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py +0 -105
msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py +0 -121
msprobe/test/pytorch_ut/free_benchmark/test_main.py +0 -101
msprobe/test/pytorch_ut/functional/test_dump_module.py +0 -15
msprobe/test/pytorch_ut/hook_module/test_api_registry.py +0 -130
msprobe/test/pytorch_ut/hook_module/test_hook_module.py +0 -42
msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py +0 -65
msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py +0 -35
msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py +0 -20
msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py +0 -35
msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py +0 -43
msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py +0 -11
msprobe/test/pytorch_ut/test_pt_config.py +0 -69
msprobe/test/pytorch_ut/test_service.py +0 -59
msprobe/test/resources/advisor.txt +0 -3
msprobe/test/resources/compare_result_20230703104808.csv +0 -9
msprobe/test/resources/compare_result_without_accuracy.csv +0 -9
msprobe/test/resources/config.yaml +0 -3
msprobe/test/resources/npu_test.pkl +0 -8
msprobe/test/run_test.sh +0 -30
msprobe/test/run_ut.py +0 -58
msprobe/test/test_module_processer.py +0 -64
{mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.3.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.3.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.3.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.3.dist-info}/top_level.txt +0 -0
/msprobe/{pytorch → core}/advisor/advisor_const.py +0 -0
/msprobe/pytorch/doc/{atat → msprobe}/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md" +0 -0

msprobe/pytorch/module_processer.py CHANGED Viewed

@@ -1,15 +1,18 @@
 from functools import wraps
 import torch
 from torch.utils.hooks import BackwardHook
 from msprobe.core.common.const import Const
 from msprobe.core.data_dump.scope import ModuleRangeScope
+torch_version_above_or_equal_2 = torch.__version__.split('+')[0] >= '2.0'
 class ModuleProcesser:
+    module_count = {}
     module_stack = []
     api_parent_node = ""
     module_node = {}
-    current_module_name = ""
     def __init__(self, scope):
         if isinstance(scope, ModuleRangeScope):
@@ -19,15 +22,22 @@ class ModuleProcesser:
         BackwardHook.setup_input_hook = ModuleProcesser.clone_return_value(BackwardHook.setup_input_hook)
         BackwardHook.setup_output_hook = ModuleProcesser.clone_return_value(BackwardHook.setup_output_hook)
         BackwardHook.setup_output_hook = ModuleProcesser.filter_tensor_and_tuple(BackwardHook.setup_output_hook)
-        self.module_count = {}
     @staticmethod
     def filter_tensor_and_tuple(func):
         @wraps(func)
         def wrap_by_filter_tensor_and_tuple(*args, **kwargs):
-            # setup_output_hook传入非tensor数据，工具后续dump会报错，处理方式是非tensor数据不传入
+            # setup_output_hook传入非tensor数据，工具后续dump会报错，处理方式是解析非tensor数据的属性，对tensor属性挂hook
             # setup_output_hook定义为setup_output_hook(self, args)，因此处理第二个位置参数，即*args[1]
             if not isinstance(args[1], (torch.Tensor, tuple)):
+                for item_str in dir(args[1]):
+                    item = getattr(args[1], item_str)
+                    # 处理tensor或者只包含tensor的元组
+                    if isinstance(item, torch.Tensor) or \
+                            (isinstance(item, tuple) and all(isinstance(x, torch.Tensor) for x in item)):
+                        args_new = (args[0], item)
+                        result = func(*args_new, **kwargs)
+                        setattr(args[1], item_str, result)
                 return args[1]
             return func(*args, **kwargs)
@@ -55,11 +65,26 @@ class ModuleProcesser:
         else:
             return result
+    @staticmethod
+    def module_count_func(module_name):
+        if module_name not in ModuleProcesser.module_count:
+            ModuleProcesser.module_count[module_name] = 0
+        else:
+            ModuleProcesser.module_count[module_name] += 1
+        return ModuleProcesser.module_count[module_name]
+    @classmethod
+    def reset_module_stats(cls):
+        cls.module_count = {}
+        cls.module_stack = []
+        cls.api_parent_node = ""
+        cls.module_node = {}
     def node_hook(self, name_prefix, start_or_stop, **kwargs):
         def pre_hook(module, input, output=None):
             try:
-                index = self.module_count_func(name_prefix)
+                index = ModuleProcesser.module_count_func(name_prefix)
             except IndexError as e:
                 index = None
                 pass
@@ -85,14 +110,29 @@ class ModuleProcesser:
             if self.scope:
                 self.scope.end_module(module.mindstudio_reserved_name)
-        if Const.START in start_or_stop:
-            return pre_hook
-        else:
-            return end_hook
+        def backward_hook(module, input, output=None):
+            try:
+                index = ModuleProcesser.module_count_func(name_prefix)
+            except IndexError as e:
+                index = None
+                pass
+            module.mindstudio_reserved_name = full_name = name_prefix + Const.SEP + str(index)
+            forward_full_name = full_name.replace(Const.BACKWARD, Const.FORWARD)
+            ModuleProcesser.module_node[full_name] = ModuleProcesser.module_node[forward_full_name].replace(
+                Const.FORWARD, Const.BACKWARD) if ModuleProcesser.module_node[forward_full_name] else None
+            ModuleProcesser.api_parent_node = None
+            if self.scope:
+                self.scope.begin_module(full_name)
-    def module_count_func(self, module_name):
-        if module_name not in self.module_count:
-            self.module_count[module_name] = 0
+        if torch_version_above_or_equal_2:
+            if Const.START in start_or_stop:
+                return pre_hook
+            else:
+                return end_hook
         else:
-            self.module_count[module_name] += 1
-        return self.module_count[module_name]
+            if Const.FORWARD in name_prefix and Const.START in start_or_stop:
+                return pre_hook
+            elif Const.BACKWARD in name_prefix:
+                return backward_hook
+            else:
+                return end_hook

msprobe/pytorch/online_dispatch/compare.py CHANGED Viewed

@@ -6,10 +6,9 @@ import json
 from collections import namedtuple
 from rich.table import Table
 from rich.console import Console
+from msprobe.core.common.const import CompareConst, FileCheckConst
+from msprobe.core.common.file_check import FileOpen, change_mode
 from .single_compare import single_benchmark_compare_wrap
-from .utils import DispatchException
-from msprobe.core.common.const import CompareConst
-from msprobe.core.common.file_check import FileOpen
 from msprobe.pytorch.common.log import logger
 from msprobe.core.common.utils import CompareException
@@ -42,6 +41,7 @@ def write_csv(data, filepath):
     with FileOpen(filepath, 'a', encoding='utf-8-sig') as f:
         writer = csv.writer(f)
         writer.writerows(data)
+    change_mode(filepath, FileCheckConst.DATA_FILE_AUTHORITY)
 class Saver:
@@ -228,7 +228,7 @@ class Comparator:
         else:
             is_bwd_success, bwd_compare_alg_results = True, None
         if is_bwd_success and bwd_compare_alg_results is None:
-            self.saver.record_results(ResultInfo(api_name, is_fwd_success, CompareConst.NA, fwd_compare_alg_results,
+            self.saver.record_results(ResultInfo(api_name, is_fwd_success, CompareConst.NAN, fwd_compare_alg_results,
                                       bwd_compare_alg_results))
         else:
             self.saver.record_results(ResultInfo(api_name, is_fwd_success, is_bwd_success, fwd_compare_alg_results,

msprobe/pytorch/online_dispatch/dispatch.py CHANGED Viewed

@@ -4,7 +4,6 @@ import json
 from pathlib import Path
 from multiprocessing import Manager, Pool
-import yaml
 import torch
 from torch.utils._python_dispatch import TorchDispatchMode
@@ -16,14 +15,14 @@ except ImportError:
 else:
     is_npu = True
+from msprobe.core.common.utils import check_file_or_directory_path, check_path_before_create, load_yaml
+from msprobe.core.common.const import Const, CompareConst
+from msprobe.pytorch.common.log import logger
 from .dump_compare import dispatch_workflow, dispatch_multiprocess, error_call, TimeStatistics, \
     DispatchRunParam, DisPatchDataInfo
-from .utils import get_callstack, data_to_cpu, logger_debug, logger_error, logger_warn, logger_logo, get_sys_info, \
-    DispatchException
+from .utils import get_callstack, data_to_cpu,  get_sys_info, DispatchException, COMPARE_LOGO
 from .compare import Comparator
-from msprobe.core.common.file_check import FileOpen
-from msprobe.core.common.utils import check_file_or_directory_path, check_path_before_create
-from msprobe.core.common.const import Const, CompareConst
 current_time = time.strftime("%Y%m%d%H%M%S")
 RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv"
@@ -33,12 +32,12 @@ DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv"
 class PtdbgDispatch(TorchDispatchMode):
     def __init__(self, dump_mode=Const.OFF, api_list=None, debug=False, dump_path=None, tag=None, process_num=0):
         super(PtdbgDispatch, self).__init__()
-        logger_logo()
+        logger.info(COMPARE_LOGO)
         if not is_npu:
-            logger_error("Please confirm you run environment installed torch_npu!")
+            logger.error("Please confirm you run environment installed torch_npu!")
             return
         if dump_path is None:
-            logger_error("Please set dump_path when dump_mode is config!")
+            logger.error("Please set dump_path when dump_mode is config!")
         check_file_or_directory_path(dump_path, True)
         self.device_id = torch_npu._C._npu_getDevice()
@@ -49,7 +48,7 @@ class PtdbgDispatch(TorchDispatchMode):
         self.single_api_index_dict = {}
         self.device_dump_path_cpu = None
         self.device_dump_path_npu = None
-        self.all_summery = []
+        self.all_summary = []
         self.call_stack_list = []
         self.process_num = process_num
         self.filter_dump_api()
@@ -70,13 +69,13 @@ class PtdbgDispatch(TorchDispatchMode):
         self.aten_ops_blacklist = []
         self.npu_adjust_autogard = []
         yaml_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "torch_ops_config.yaml")
-        self.load_yaml_file(yaml_path)
+        self.get_ops(yaml_path)
         self.lock = None
         if process_num > 0:
             self.pool = Pool(process_num)
         if debug:
-            logger_debug(f'Main pid:{os.getpid()} device:{self.device_id} dump_list:{self.dump_api_list} '
+            logger.info(f'Main pid:{os.getpid()} device:{self.device_id} dump_list:{self.dump_api_list} '
                          f'dump_mode:{self.dump_mode} cpu_path[{self.root_cpu_path}], npu_path[{self.root_npu_path}], '
                          f'process[{process_num}]')
@@ -85,17 +84,17 @@ class PtdbgDispatch(TorchDispatchMode):
         if not is_npu:
             return
-        logger_debug(f'start write compare csv: Rank[{self.device_id}], Pid[{os.getpid()}')
+        logger.info(f'start write compare csv: Rank[{self.device_id}], Pid[{os.getpid()}')
         if self.process_num > 0:
             self.pool.close()
             self.pool.join()
-            summery_path = os.path.join(self.root_cpu_path, f'summary.json')
-            if not os.path.exists(summery_path):
-                logger_error("Please check train log, An exception may have occurred!")
+            summary_path = os.path.join(self.root_cpu_path, f'summary.json')
+            if not os.path.exists(summary_path):
+                logger.error("Please check train log, An exception may have occurred!")
                 return
-            check_file_or_directory_path(summery_path, False)
-            fp_handle = open(summery_path, "r")
+            check_file_or_directory_path(summary_path, False)
+            fp_handle = open(summary_path, "r")
             while True:
                 json_line_data = fp_handle.readline()
                 if json_line_data == '\n':
@@ -103,7 +102,7 @@ class PtdbgDispatch(TorchDispatchMode):
                 if len(json_line_data) == 0:
                     break
                 msg = json.loads(json_line_data)
-                self.all_summery[msg[0]] = msg[1]
+                self.all_summary[msg[0]] = msg[1]
             fp_handle.close()
         if self.debug_flag:
@@ -111,20 +110,20 @@ class PtdbgDispatch(TorchDispatchMode):
             output_num = 0
             total_num = 0
-            for list_data in self.all_summery:
+            for list_data in self.all_summary:
                 for data in list_data:
-                    logger_debug(f'summery: Device[{self.device_id}], Pid[{os.getpid()}], Data[{data}]')
+                    logger.info(f'summary: Device[{self.device_id}], Pid[{os.getpid()}], Data[{data}]')
                     if "_input" in data[CompareConst.NPU_NAME]:
                         input_num = input_num + 1
                     if "_output" in data[CompareConst.NPU_NAME]:
                         output_num = output_num + 1
                     total_num = total_num + 1
-            logger_debug(f'Dispatch exit: Device[{self.device_id}], Pid[{os.getpid()} Input[{input_num}] '
+            logger.info(f'Dispatch exit: Device[{self.device_id}], Pid[{os.getpid()} Input[{input_num}] '
                          f'Output[{output_num}] Total[{total_num}] API_Total[{self.api_index}]]')
     def __torch_dispatch__(self, func, types, args=(), kwargs=None):
         if not is_npu:
-            logger_error("Please confirm you run environment installed torch_npu!")
+            logger.error("Please confirm you run environment installed torch_npu!")
             return func(*args, **kwargs)
         func_name_split_list = func.__name__.split(".")
@@ -132,7 +131,7 @@ class PtdbgDispatch(TorchDispatchMode):
         try:
             aten_api_overload_name = func_name_split_list[1]
         except IndexError:
-            logger_error(f"Please check the func name {func.__name__}!")
+            logger.error(f"Please check the func name {func.__name__}!")
             return func(*args, **kwargs)
         self.enable_autogard(aten_api)
@@ -151,7 +150,7 @@ class PtdbgDispatch(TorchDispatchMode):
         run_param = self.get_run_param(aten_api, func.__name__, aten_api_overload_name)
         if self.debug_flag:
-            logger_debug(f'Dispatch Info: Rank[{self.device_id}], Pid[{os.getpid()}], Func[{func.__name__}], '
+            logger.info(f'Dispatch Info: Rank[{self.device_id}], Pid[{os.getpid()}], Func[{func.__name__}], '
                          f'Name[{run_param.aten_api}_{run_param.single_api_index}], '
                          f'Count[{self.api_index}], Sys[{get_sys_info()}]')
@@ -175,21 +174,21 @@ class PtdbgDispatch(TorchDispatchMode):
             cpu_out = cpu_out.float()
         if self.process_num == 0:
-            self.all_summery.append([])
-            data_info = DisPatchDataInfo(cpu_args, cpu_kwargs, self.all_summery, func, npu_out_cpu, cpu_out, self.lock)
+            self.all_summary.append([])
+            data_info = DisPatchDataInfo(cpu_args, cpu_kwargs, self.all_summary, func, npu_out_cpu, cpu_out, self.lock)
             dispatch_workflow(run_param, data_info)
         else:
             self.lock.acquire()
-            self.all_summery.append([])
+            self.all_summary.append([])
             self.lock.release()
             run_param.process_flag = True
             if self.check_fun(func, run_param):
-                data_info = DisPatchDataInfo(cpu_args, cpu_kwargs, self.all_summery, None, npu_out_cpu, cpu_out,
+                data_info = DisPatchDataInfo(cpu_args, cpu_kwargs, self.all_summary, None, npu_out_cpu, cpu_out,
                                              self.lock)
                 self.pool.apply_async(func=dispatch_multiprocess, args=(run_param, data_info),
                                       error_callback=error_call)
             else:
-                logger_error("can not get correct function please set process_num=0")
+                logger.error("can not get correct function please set process_num=0")
         return npu_out
     @staticmethod
@@ -208,17 +207,16 @@ class PtdbgDispatch(TorchDispatchMode):
         time.sleep(1)
         time_now = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
         if tag is None or not isinstance(tag, str):
-            logger_warn('There is not tag or the type of tag is not string.')
+            logger.warning('There is not tag or the type of tag is not string.')
             dir_name = f'msprobe_rank{self.device_id}_{time_now}'
         else:
             dir_name = f'msprobe_{tag}_rank{self.device_id}_{time_now}'
         return dir_name
-    def load_yaml_file(self, file_path):
-        with FileOpen(file_path, 'r') as f:
-            yaml_file = yaml.safe_load(f)
-            self.aten_ops_blacklist = yaml_file.get('aten_ops_blacklist')
-            self.npu_adjust_autogard = yaml_file.get('npu_adjust_autogard')
+    def get_ops(self, file_path):
+        yaml_file = load_yaml(file_path)
+        self.aten_ops_blacklist = yaml_file.get('aten_ops_blacklist')
+        self.npu_adjust_autogard = yaml_file.get('npu_adjust_autogard')
     def filter_dump_api(self):
         if self.dump_mode != Const.LIST or not self.dump_api_list:
@@ -230,7 +228,7 @@ class PtdbgDispatch(TorchDispatchMode):
             if aten_api in aten_api_list:
                 dump_api_list.append(aten_api)
             else:
-                logger_warn(f'{aten_api} is not aten api will not dump, please refer to torch.ops.aten')
+                logger.warning(f'{aten_api} is not aten api will not dump, please refer to torch.ops.aten')
         self.dump_api_list = dump_api_list
     def get_run_param(self, aten_api, func_name, aten_api_overload_name):
@@ -257,16 +255,16 @@ class PtdbgDispatch(TorchDispatchMode):
     def check_param(self):
         if self.dump_mode not in Const.ONLINE_DUMP_MODE:
-            logger_error('The parameter "dump mode" can only be one of {}.'.format(Const.ONLINE_DUMP_MODE))
+            logger.error('The parameter "dump mode" can only be one of {}.'.format(Const.ONLINE_DUMP_MODE))
             raise DispatchException(DispatchException.INVALID_PARAMETER)
         if not isinstance(self.dump_api_list, list):
-            logger_error('The type of parameter "api_list" can only be list.')
+            logger.error('The type of parameter "api_list" can only be list.')
             raise DispatchException(DispatchException.INVALID_PARAMETER)
         if not isinstance(self.debug_flag, bool):
-            logger_error('The type of parameter "debug" can only be bool.')
+            logger.error('The type of parameter "debug" can only be bool.')
             raise DispatchException(DispatchException.INVALID_PARAMETER)
         if not isinstance(self.process_num, int) or self.process_num < 0:
-            logger_error('The type of parameter "process_num" can only be int and it should not be less than 0.')
+            logger.error('The type of parameter "process_num" can only be int and it should not be less than 0.')
             raise DispatchException(DispatchException.INVALID_PARAMETER)
     def enable_autogard(self, aten_api):

msprobe/pytorch/online_dispatch/dump_compare.py CHANGED Viewed

@@ -5,11 +5,10 @@ from datetime import datetime, timezone
 import pandas as pd
 import torch
-from .utils import np_save_data, logger_debug, logger_error, logger_warn, logger_user, COLOR_RED, COLOR_GREEN, \
-    COLOR_RESET, CSV_COLUMN_NAME
-from msprobe.core.common.file_check import FileOpen, change_mode
-from msprobe.core.common.const import CompareConst, FileCheckConst, Const
 from msprobe.pytorch.common.log import logger
+from msprobe.core.common.file_check import FileOpen
+from .utils import np_save_data
 class DispatchRunParam:
     def __init__(self, debug_flag, device_id, root_npu_path, root_cpu_path, process_num, comparator):
@@ -32,10 +31,10 @@ class DispatchRunParam:
 class DisPatchDataInfo:
-    def __init__(self, cpu_args, cpu_kwargs, all_summery, func, npu_out_cpu, cpu_out, lock):
+    def __init__(self, cpu_args, cpu_kwargs, all_summary, func, npu_out_cpu, cpu_out, lock):
         self.cpu_args = cpu_args
         self.cpu_kwargs = cpu_kwargs
-        self.all_summery = all_summery
+        self.all_summary = all_summary
         self.func = func
         self.npu_out_cpu = npu_out_cpu
         self.cpu_out = cpu_out
@@ -57,7 +56,7 @@ class TimeStatistics:
     def __enter__(self):
         if self.debug:
             self.time = datetime.now(tz=timezone.utc)
-            logger_debug(f'Time[{self.tag}]-ENTER: Dev[{self.device}], Pid[{os.getpid()}], Fun[{self.fun}], ' \
+            logger.info(f'Time[{self.tag}]-ENTER: Dev[{self.device}], Pid[{os.getpid()}], Fun[{self.fun}], ' \
                          f'Id[{self.index}]')
     def __exit__(self, exc_type, exc_val, exc_tb):
@@ -68,9 +67,9 @@ class TimeStatistics:
             hot_time_cost = "Hotspot " + time_cost
             if cost_time.total_seconds() > self.timeout:
-                logger_debug(hot_time_cost)
+                logger.info(hot_time_cost)
             else:
-                logger_debug(time_cost)
+                logger.info(time_cost)
 def support_basic_type(data):
@@ -87,24 +86,24 @@ def dump_data(data, prefix, dump_path):
     elif support_basic_type(data):
         if isinstance(data, torch.Tensor) and data.is_meta:
             return
-        # dump data may greater than summery_list collect
+        # dump data may greater than summary_list collect
         np_save_data(data, prefix, dump_path)
-def save_temp_summery(api_index, single_api_summery, path, lock):
-    summery_path = os.path.join(path, f'summery.json')
+def save_temp_summary(api_index, single_api_summary, path, lock):
+    summary_path = os.path.join(path, f'summary.json')
     lock.acquire()
-    with FileOpen(summery_path, "a") as f:
-        json.dump([api_index, single_api_summery], f)
+    with FileOpen(summary_path, "a") as f:
+        json.dump([api_index, single_api_summary], f)
         f.write('\n')
     lock.release()
 def dispatch_workflow(run_param: DispatchRunParam, data_info: DisPatchDataInfo):
     cpu_args, cpu_kwargs = data_info.cpu_args, data_info.cpu_kwargs
-    all_summery, func = data_info.all_summery, data_info.func
+    all_summary, func = data_info.all_summary, data_info.func
     npu_out_cpu, cpu_out, lock = data_info.npu_out_cpu, data_info.cpu_out, data_info.lock
-    single_api_summery = []
+    single_api_summary = []
     prefix_input = f'{run_param.aten_api}_{run_param.single_api_index}_input'
     prefix_output = f'{run_param.aten_api}_{run_param.single_api_index}_output'
@@ -127,9 +126,9 @@ def dispatch_workflow(run_param: DispatchRunParam, data_info: DisPatchDataInfo):
             dump_data(npu_out_cpu, prefix_output, run_param.root_npu_path)
     if run_param.process_num == 0:
-        all_summery[run_param.api_index - 1] = copy.deepcopy(single_api_summery)
+        all_summary[run_param.api_index - 1] = copy.deepcopy(single_api_summary)
     else:
-        save_temp_summery(run_param.api_index - 1, single_api_summery, run_param.root_cpu_path, lock)
+        save_temp_summary(run_param.api_index - 1, single_api_summary, run_param.root_cpu_path, lock)
 def get_torch_func(run_param):
@@ -155,32 +154,3 @@ def dispatch_multiprocess(run_param, dispatch_data_info):
 def error_call(err):
     logger.error(f'multiprocess {err}')
-def save_csv(all_summery, call_stack_list, csv_path):
-    df = pd.DataFrame(columns=CSV_COLUMN_NAME)
-    for index, list_data in enumerate(all_summery):
-        for data in list_data:
-            csv_row_data = {CompareConst.NPU_NAME: data[CompareConst.NPU_NAME],
-                            CompareConst.BENCH_NAME: data[CompareConst.BENCH_NAME],
-                            CompareConst.NPU_DTYPE: data[CompareConst.NPU_DTYPE],
-                            CompareConst.BENCH_DTYPE: data[CompareConst.BENCH_DTYPE],
-                            CompareConst.NPU_SHAPE: data[CompareConst.NPU_SHAPE],
-                            CompareConst.BENCH_SHAPE: data[CompareConst.BENCH_SHAPE],
-                            CompareConst.NPU_MAX: data[CompareConst.NPU_MAX],
-                            CompareConst.NPU_MIN: data[CompareConst.NPU_MIN],
-                            CompareConst.NPU_MEAN: data[CompareConst.NPU_MEAN],
-                            CompareConst.BENCH_MAX: data[CompareConst.BENCH_MAX],
-                            CompareConst.BENCH_MIN: data[CompareConst.BENCH_MIN],
-                            CompareConst.BENCH_MEAN: data[CompareConst.BENCH_MEAN],
-                            CompareConst.COSINE: data[CompareConst.COSINE],
-                            CompareConst.MAX_ABS_ERR: data[CompareConst.MAX_ABS_ERR],
-                            CompareConst.MAX_RELATIVE_ERR: data[CompareConst.MAX_RELATIVE_ERR],
-                            CompareConst.ACCURACY: data[CompareConst.ACCURACY],
-                            CompareConst.STACK: call_stack_list[index],
-                            CompareConst.ERROR_MESSAGE: data[CompareConst.ERROR_MESSAGE]}
-            row_df = pd.DataFrame.from_dict(csv_row_data, orient='index').T
-            df = pd.concat([df, row_df])
-    df.to_csv(csv_path, index=False)
-    change_mode(csv_path, FileCheckConst.DATA_FILE_AUTHORITY)

msprobe/pytorch/online_dispatch/single_compare.py CHANGED Viewed

@@ -3,15 +3,15 @@ from functools import wraps
 import torch
 from prettytable import PrettyTable
 from collections import namedtuple
-from .utils import logger_user, logger_debug
+from msprobe.pytorch.common.log import logger
 def func_log_wrapper():
     def _out_wrapper(func):
         @wraps(func)
         def _in_wrapper(*kargs, **kwargs):
-            logger_debug("start to run: {}".format(func.__name__))
+            logger.info(f"start to run: {func.__name__}")
             x = func(*kargs, **kwargs)
-            logger_debug("end to run: {}".format(func.__name__))
+            logger.info(f"end to run: {func.__name__}")
             return x
         return _in_wrapper
@@ -165,7 +165,7 @@ class SingleBenchmarkAccuracyCompare:
     def compute_binary_diff(cls, npu_out, bench_out):
         result = torch.equal(npu_out, bench_out)
         if result:
-            logger_user("二进制精度比对通过, 无需单标杆比对法验证")
+            logger.info("二进制精度比对通过, 无需单标杆比对法验证")
         return SingleBenchmarkAccuracyResult(result=result, max_abs_diff=0, max_rel_diff=0, error_balance=0)
     @classmethod
@@ -301,7 +301,7 @@ class SingleBenchSummary:
         table.add_row(["max_rel_diff", self.max_rel_diff, self.error_thd])
         table.add_row(["max_rel_idx", self.max_rel_idx, "-"])
-        logger_user(table)
+        logger.info(table)
     def to_column_value(self):
         return [self.bench_dtype, self.npu_dtype, self.shape, self.error_balance,

msprobe/pytorch/online_dispatch/utils.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import inspect
-import logging
 import psutil
 import torch
 import numpy as np
@@ -14,6 +13,7 @@ else:
 from msprobe.core.common.const import CompareConst, FileCheckConst
 from msprobe.core.common.file_check import change_mode
+from msprobe.core.common.log import logger
 cpu_device = torch._C.device("cpu")
 COLOR_RED = '\033[31m'
@@ -77,7 +77,7 @@ def np_save_data(data, file_name, data_path):
         np.save(dump_path, data)
         change_mode(dump_path, FileCheckConst.DATA_FILE_AUTHORITY)
     except Exception as e:
-        logger_error("save numpy failed, error: {}".format(e))
+        logger.error("save numpy failed, error: {}".format(e))
     finally:
         pass
@@ -124,47 +124,6 @@ def data_to_cpu(data, deep, data_cpu):
         return data
-def get_mp_logger():
-    logger = logging.getLogger(__name__)
-    if not logger.handlers:
-        logger.setLevel(logging.INFO)
-        handler = logging.StreamHandler()
-        formatter = logging.Formatter('%(asctime)s %(message)s')
-        logger.propagate = True
-        handler.setFormatter(formatter)
-        logger.addHandler(handler)
-    return logger.info
-def logger_debug(mesg):
-    logger = get_mp_logger()
-    logger(f'DEBUG ' + mesg)
-def logger_info(mesg):
-    logger = get_mp_logger()
-    logger(f'INFO ' + mesg)
-def logger_warn(mesg):
-    logger = get_mp_logger()
-    logger(f'{COLOR_YELLOW}WARNING {mesg} {COLOR_RESET}')
-def logger_error(mesg):
-    logger = get_mp_logger()
-    logger(f'{COLOR_RED}ERROR {mesg} {COLOR_RESET}')
-def logger_user(mesg):
-    logger = get_mp_logger()
-    logger(mesg)
-def logger_logo():
-    logger_user(f'{COLOR_CYAN}{COMPARE_LOGO} {COLOR_RESET}')
 def get_sys_info():
     mem = psutil.virtual_memory()
     cpu_percent = psutil.cpu_percent(interval=1)

mindstudio-probe 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

mindstudio-probe 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl