PyPI - mindstudio-probe - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

mindstudio-probe 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (249) hide show

{mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.3.dist-info}/METADATA +5 -1
mindstudio_probe-1.0.3.dist-info/RECORD +272 -0
msprobe/README.md +78 -23
msprobe/__init__.py +1 -0
msprobe/config/README.md +182 -40
msprobe/config/config.json +22 -0
msprobe/core/__init__.py +0 -0
msprobe/{pytorch → core}/advisor/advisor.py +3 -3
msprobe/{pytorch → core}/advisor/advisor_result.py +2 -2
msprobe/core/common/const.py +82 -5
msprobe/core/common/exceptions.py +30 -18
msprobe/core/common/file_check.py +19 -1
msprobe/core/common/log.py +15 -1
msprobe/core/common/utils.py +130 -30
msprobe/core/common_config.py +32 -19
msprobe/core/compare/acc_compare.py +299 -0
msprobe/core/compare/check.py +95 -0
msprobe/core/compare/compare_cli.py +49 -0
msprobe/core/compare/highlight.py +222 -0
msprobe/core/compare/multiprocessing_compute.py +149 -0
msprobe/{pytorch → core}/compare/npy_compare.py +55 -4
msprobe/core/compare/utils.py +429 -0
msprobe/core/data_dump/data_collector.py +39 -35
msprobe/core/data_dump/data_processor/base.py +85 -37
msprobe/core/data_dump/data_processor/factory.py +5 -7
msprobe/core/data_dump/data_processor/mindspore_processor.py +198 -0
msprobe/core/data_dump/data_processor/pytorch_processor.py +94 -51
msprobe/core/data_dump/json_writer.py +11 -11
msprobe/core/grad_probe/__init__.py +0 -0
msprobe/core/grad_probe/constant.py +71 -0
msprobe/core/grad_probe/grad_compare.py +175 -0
msprobe/core/grad_probe/utils.py +52 -0
msprobe/doc/grad_probe/grad_probe.md +207 -0
msprobe/doc/grad_probe/img/image-1.png +0 -0
msprobe/doc/grad_probe/img/image-2.png +0 -0
msprobe/doc/grad_probe/img/image-3.png +0 -0
msprobe/doc/grad_probe/img/image-4.png +0 -0
msprobe/doc/grad_probe/img/image.png +0 -0
msprobe/mindspore/api_accuracy_checker/__init__.py +0 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +246 -0
msprobe/mindspore/api_accuracy_checker/api_info.py +69 -0
msprobe/mindspore/api_accuracy_checker/api_runner.py +152 -0
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +197 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +224 -0
msprobe/mindspore/api_accuracy_checker/main.py +16 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +114 -0
msprobe/mindspore/api_accuracy_checker/utils.py +63 -0
msprobe/mindspore/cell_processor.py +34 -0
msprobe/mindspore/common/const.py +87 -0
msprobe/mindspore/common/log.py +38 -0
msprobe/mindspore/common/utils.py +57 -0
msprobe/mindspore/compare/distributed_compare.py +75 -0
msprobe/mindspore/compare/ms_compare.py +117 -0
msprobe/mindspore/compare/ms_graph_compare.py +317 -0
msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -0
msprobe/mindspore/debugger/debugger_config.py +38 -15
msprobe/mindspore/debugger/precision_debugger.py +79 -4
msprobe/mindspore/doc/compare.md +58 -0
msprobe/mindspore/doc/dump.md +158 -6
msprobe/mindspore/dump/dump_tool_factory.py +19 -22
msprobe/mindspore/dump/hook_cell/api_registry.py +104 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +53 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +925 -0
msprobe/mindspore/dump/hook_cell/wrap_functional.py +91 -0
msprobe/mindspore/dump/hook_cell/wrap_tensor.py +63 -0
msprobe/mindspore/dump/jit_dump.py +56 -0
msprobe/mindspore/dump/kernel_kbyk_dump.py +65 -0
msprobe/mindspore/free_benchmark/__init__.py +0 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +116 -0
msprobe/mindspore/free_benchmark/common/__init__.py +0 -0
msprobe/mindspore/free_benchmark/common/config.py +12 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +17 -0
msprobe/mindspore/free_benchmark/common/utils.py +71 -0
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -0
msprobe/mindspore/free_benchmark/decorator/__init__.py +0 -0
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +42 -0
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +107 -0
msprobe/mindspore/free_benchmark/handler/__init__.py +0 -0
msprobe/mindspore/free_benchmark/handler/base_handler.py +90 -0
msprobe/mindspore/free_benchmark/handler/check_handler.py +41 -0
msprobe/mindspore/free_benchmark/handler/fix_handler.py +36 -0
msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -0
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +67 -0
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +21 -0
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +63 -0
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +34 -0
msprobe/mindspore/free_benchmark/perturbation/no_change.py +12 -0
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +27 -0
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +33 -0
msprobe/mindspore/grad_probe/__init__.py +0 -0
msprobe/mindspore/grad_probe/global_context.py +91 -0
msprobe/mindspore/grad_probe/grad_analyzer.py +231 -0
msprobe/mindspore/grad_probe/grad_monitor.py +27 -0
msprobe/mindspore/grad_probe/grad_stat_csv.py +132 -0
msprobe/mindspore/grad_probe/hook.py +92 -0
msprobe/mindspore/grad_probe/utils.py +29 -0
msprobe/mindspore/ms_config.py +63 -15
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +17 -15
msprobe/mindspore/runtime.py +4 -0
msprobe/mindspore/service.py +354 -0
msprobe/mindspore/task_handler_factory.py +7 -4
msprobe/msprobe.py +66 -26
msprobe/pytorch/__init__.py +1 -1
msprobe/pytorch/api_accuracy_checker/common/config.py +21 -16
msprobe/pytorch/api_accuracy_checker/common/utils.py +1 -60
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +2 -5
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +46 -10
msprobe/pytorch/api_accuracy_checker/compare/compare.py +84 -48
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +8 -12
msprobe/pytorch/api_accuracy_checker/config.yaml +7 -1
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +15 -11
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +11 -15
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +16 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +193 -105
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +68 -1
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/__init__.py +0 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +202 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +324 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +204 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +218 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +10 -0
msprobe/pytorch/bench_functions/__init__.py +15 -0
msprobe/pytorch/bench_functions/apply_adam_w.py +28 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +19 -0
msprobe/pytorch/bench_functions/fast_gelu.py +55 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +6 -0
msprobe/pytorch/bench_functions/linear.py +12 -0
msprobe/pytorch/bench_functions/matmul_backward.py +48 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +421 -0
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +52 -0
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +26 -0
msprobe/pytorch/bench_functions/swiglu.py +55 -0
msprobe/pytorch/common/parse_json.py +3 -1
msprobe/pytorch/common/utils.py +83 -7
msprobe/pytorch/compare/distributed_compare.py +19 -64
msprobe/pytorch/compare/match.py +3 -6
msprobe/pytorch/compare/pt_compare.py +40 -0
msprobe/pytorch/debugger/debugger_config.py +11 -2
msprobe/pytorch/debugger/precision_debugger.py +34 -4
msprobe/pytorch/doc/api_accuracy_checker.md +57 -13
msprobe/pytorch/doc/api_accuracy_checker_online.md +187 -0
msprobe/pytorch/doc/dump.md +73 -20
msprobe/pytorch/doc/ptdbg_ascend_compare.md +75 -11
msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +3 -3
msprobe/pytorch/doc/run_overflow_check.md +1 -1
msprobe/pytorch/doc//321/206/320/247/320/260/321/206/320/260/320/227/321/206/320/255/320/226/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/205/320/254/342/225/221/321/206/320/251/320/277/321/211/320/272/320/234/321/210/320/277/320/221/321/205/320/242/320/234/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +151 -0
msprobe/pytorch/free_benchmark/common/constant.py +3 -0
msprobe/pytorch/free_benchmark/common/utils.py +4 -0
msprobe/pytorch/free_benchmark/compare/grad_saver.py +22 -26
msprobe/pytorch/free_benchmark/main.py +7 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +43 -29
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +0 -1
msprobe/pytorch/function_factory.py +75 -0
msprobe/pytorch/functional/dump_module.py +4 -4
msprobe/pytorch/grad_probe/__init__.py +0 -0
msprobe/pytorch/grad_probe/grad_monitor.py +90 -0
msprobe/pytorch/grad_probe/grad_stat_csv.py +129 -0
msprobe/pytorch/hook_module/hook_module.py +14 -3
msprobe/pytorch/hook_module/support_wrap_ops.yaml +2 -1
msprobe/pytorch/hook_module/utils.py +9 -9
msprobe/pytorch/hook_module/wrap_aten.py +20 -10
msprobe/pytorch/hook_module/wrap_distributed.py +10 -7
msprobe/pytorch/hook_module/wrap_functional.py +4 -7
msprobe/pytorch/hook_module/wrap_npu_custom.py +21 -10
msprobe/pytorch/hook_module/wrap_tensor.py +5 -6
msprobe/pytorch/hook_module/wrap_torch.py +5 -7
msprobe/pytorch/hook_module/wrap_vf.py +6 -8
msprobe/pytorch/module_processer.py +53 -13
msprobe/pytorch/online_dispatch/compare.py +4 -4
msprobe/pytorch/online_dispatch/dispatch.py +39 -41
msprobe/pytorch/online_dispatch/dump_compare.py +17 -47
msprobe/pytorch/online_dispatch/single_compare.py +5 -5
msprobe/pytorch/online_dispatch/utils.py +2 -43
msprobe/pytorch/parse_tool/lib/compare.py +31 -19
msprobe/pytorch/parse_tool/lib/config.py +2 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +4 -4
msprobe/pytorch/parse_tool/lib/utils.py +34 -80
msprobe/pytorch/parse_tool/lib/visualization.py +4 -3
msprobe/pytorch/pt_config.py +100 -6
msprobe/pytorch/service.py +104 -19
mindstudio_probe-1.0.1.dist-info/RECORD +0 -228
msprobe/mindspore/dump/api_kbk_dump.py +0 -55
msprobe/pytorch/compare/acc_compare.py +0 -1024
msprobe/pytorch/compare/highlight.py +0 -100
msprobe/test/core_ut/common/test_utils.py +0 -345
msprobe/test/core_ut/data_dump/test_data_collector.py +0 -47
msprobe/test/core_ut/data_dump/test_json_writer.py +0 -183
msprobe/test/core_ut/data_dump/test_scope.py +0 -151
msprobe/test/core_ut/test_common_config.py +0 -152
msprobe/test/core_ut/test_file_check.py +0 -218
msprobe/test/core_ut/test_log.py +0 -109
msprobe/test/mindspore_ut/test_api_kbk_dump.py +0 -51
msprobe/test/mindspore_ut/test_debugger_config.py +0 -42
msprobe/test/mindspore_ut/test_dump_tool_factory.py +0 -51
msprobe/test/mindspore_ut/test_kernel_graph_dump.py +0 -66
msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py +0 -63
msprobe/test/mindspore_ut/test_ms_config.py +0 -69
msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py +0 -51
msprobe/test/mindspore_ut/test_precision_debugger.py +0 -56
msprobe/test/mindspore_ut/test_task_handler_factory.py +0 -58
msprobe/test/pytorch_ut/advisor/test_advisor.py +0 -83
msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +0 -108
msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py +0 -39
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py +0 -112
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py +0 -77
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py +0 -125
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py +0 -10
msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py +0 -43
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json +0 -179
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json +0 -63
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py +0 -99
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py +0 -115
msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py +0 -72
msprobe/test/pytorch_ut/compare/test_acc_compare.py +0 -17
msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py +0 -105
msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py +0 -121
msprobe/test/pytorch_ut/free_benchmark/test_main.py +0 -101
msprobe/test/pytorch_ut/functional/test_dump_module.py +0 -15
msprobe/test/pytorch_ut/hook_module/test_api_registry.py +0 -130
msprobe/test/pytorch_ut/hook_module/test_hook_module.py +0 -42
msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py +0 -65
msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py +0 -35
msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py +0 -20
msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py +0 -35
msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py +0 -43
msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py +0 -11
msprobe/test/pytorch_ut/test_pt_config.py +0 -69
msprobe/test/pytorch_ut/test_service.py +0 -59
msprobe/test/resources/advisor.txt +0 -3
msprobe/test/resources/compare_result_20230703104808.csv +0 -9
msprobe/test/resources/compare_result_without_accuracy.csv +0 -9
msprobe/test/resources/config.yaml +0 -3
msprobe/test/resources/npu_test.pkl +0 -8
msprobe/test/run_test.sh +0 -30
msprobe/test/run_ut.py +0 -58
msprobe/test/test_module_processer.py +0 -64
{mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.3.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.3.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.3.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.3.dist-info}/top_level.txt +0 -0
/msprobe/{pytorch → core}/advisor/advisor_const.py +0 -0
/msprobe/pytorch/doc/{atat → msprobe}/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md" +0 -0

msprobe/pytorch/grad_probe/grad_monitor.py ADDED Viewed

@@ -0,0 +1,90 @@
+import os
+from collections import defaultdict
+import torch
+if int(torch.__version__.split('.')[0]) >= 2:
+    from torch.optim.optimizer import register_optimizer_step_pre_hook
+from msprobe.pytorch.grad_probe.grad_stat_csv import GradStatCsv
+from msprobe.core.grad_probe.utils import check_numeral_list_ascend, data_in_list_target
+from msprobe.core.grad_probe.constant import GradConst, level_adp
+from msprobe.core.common.file_check import create_directory
+from msprobe.core.common.log import logger
+from msprobe.core.common.utils import remove_path, write_csv, save_npy
+from msprobe.pytorch.common.utils import  get_rank_id, print_rank_0, save_pt
+class GradientMonitor:
+    def __init__(self, common_config, task_config):
+        level = task_config.grad_level
+        if level not in level_adp:
+            raise Exception(f"level is valid, not in {level_adp.keys()}")
+        self._level_adp = level_adp[level]
+        self._param_list = task_config.param_list
+        self._target_ranks = common_config.rank
+        logger.info(f"target rank {self._target_ranks}")
+        self._target_step = common_config.step
+        logger.info(f"target step {self._target_step}")
+        self._bounds = task_config.bounds
+        check_numeral_list_ascend(self._bounds)
+        self._output_path = common_config.dump_path
+        if not os.path.exists(self._output_path):
+            create_directory(self._output_path)
+        else:
+            logger.warning(f"the file in {self._output_path} will be recoverd")
+        self._step = -1
+        self._param2name = defaultdict(str)
+    @property
+    def output_path(self):
+        return self._output_path
+    @staticmethod
+    def save_grad_direction(param_name, grad, save_path):
+        if not os.path.exists(save_path):
+            create_directory(save_path)
+        param_grad = grad.clone().detach()
+        is_positive = param_grad > 0
+        save_filepath = os.path.join(save_path, f"{param_name}.npy")
+        save_npy(is_positive.numpy(), save_filepath)
+    def monitor(self, model):
+        print_rank_0("> parameter names:")
+        for name, param in model.named_parameters():
+            self._param2name[param] = name
+            print_rank_0(f"\t{name}")
+        setattr(self, "_rank", get_rank_id())
+        if torch.distributed.is_initialized() and not data_in_list_target(getattr(self, "_rank"), self._target_ranks):
+            return
+        self._hook_optimizer()
+    def _hook_optimizer(self):
+        def optimizer_pre_step_hook(optimizer, args, kargs):
+            self._step += 1
+            if not data_in_list_target(self._step, self._target_step):
+                return
+            output_lines = []
+            for param, param_name in self._param2name.items():
+                if not data_in_list_target(param_name, self._param_list):
+                    continue
+                grad = param.main_grad if hasattr(param, "main_grad") else param.grad
+                if grad is None:
+                    logger.info(f"grad is None: {param_name}")
+                    continue
+                grad_info = GradStatCsv.generate_csv_line(param_name, self._level_adp, grad, self._bounds)
+                output_lines.append(grad_info)
+                if self._level_adp["have_grad_direction"]:
+                    GradientMonitor.save_grad_direction(param_name, grad,
+                                                    f'{self._output_path}/rank{self._rank}/step{self._step}')
+            output_dirpath = os.path.join(self._output_path, f"rank{getattr(self, '_rank')}")
+            if not os.path.isdir(output_dirpath):
+                create_directory(output_dirpath)
+            output_path = os.path.join(output_dirpath, f"grad_summary_{self._step}.csv")
+            if os.path.exists(output_path):
+                logger.warning(f"{output_path} will be recoverd")
+                remove_path(output_path)
+            header_result = GradStatCsv.generate_csv_header(self._level_adp, self._bounds)
+            output_lines.insert(0, header_result)
+            write_csv(output_lines, output_path)
+        if int(torch.__version__.split('.')[0]) >= 2:
+            register_optimizer_step_pre_hook(optimizer_pre_step_hook)

msprobe/pytorch/grad_probe/grad_stat_csv.py ADDED Viewed

@@ -0,0 +1,129 @@
+from abc import ABC, abstractmethod
+from collections import namedtuple
+import hashlib
+import torch
+from msprobe.core.grad_probe.constant import GradConst
+CSV_header_input = namedtuple("CSV_header_input", ["bounds"])
+CSV_content_input = namedtuple("CSV_content_input", ["grad", "bounds"])
+class GradStatCsv:
+    csv = {}
+    @staticmethod
+    def generate_csv_header(level, bounds):
+        header = ["param_name"]
+        for key in level["header"]:
+            csv_header_input = CSV_header_input(bounds=bounds)
+            header.extend(GradStatCsv.csv[key].generate_csv_header(csv_header_input))
+        return header
+    @staticmethod
+    def generate_csv_line(param_name, level, grad, bounds):
+        line = [param_name]
+        for key in level["header"]:
+            csv_content_input = CSV_content_input(grad=grad, bounds=bounds)
+            line.extend(GradStatCsv.csv[key].generate_csv_content(csv_content_input))
+        return line
+def register_csv_item(key, cls=None):
+    if cls is None:
+        # 无参数时，返回装饰器函数
+        return lambda cls: register_csv_item(key, cls)
+    GradStatCsv.csv[key] = cls
+    return cls
+class CsvItem(ABC):
+    @abstractmethod
+    def generate_csv_header(csv_header_input):
+        pass
+    @abstractmethod
+    def generate_csv_content(csv_content_input):
+        pass
+@register_csv_item(GradConst.MD5)
+class CSV_md5(CsvItem):
+    def generate_csv_header(csv_header_input):
+        return ["MD5"]
+    def generate_csv_content(csv_content_input):
+        grad = csv_content_input.grad
+        tensor_bytes = grad.cpu().detach().float().numpy().tobytes()
+        md5_hash = hashlib.md5(tensor_bytes)
+        return [md5_hash.hexdigest()]
+@register_csv_item(GradConst.DISTRIBUTION)
+class CSV_distribution(CsvItem):
+    def generate_csv_header(csv_header_input):
+        bounds = csv_header_input.bounds
+        intervals = []
+        if bounds:
+            intervals.append(f"(-inf, {bounds[0]}]")
+            for i in range(1, len(bounds)):
+                intervals.append(f"({bounds[i-1]}, {bounds[i]}]")
+        if intervals:
+            intervals.append(f"({bounds[-1]}, inf)")
+        intervals.append("=0")
+        return intervals
+    def generate_csv_content(csv_content_input):
+        grad = csv_content_input.grad
+        bounds = csv_content_input.bounds
+        grad = grad.cpu().detach()
+        if grad.dtype == torch.bfloat16:
+            grad = grad.to(torch.float32)
+        element_num = grad.numel()
+        grad_equal_0_num = (grad == 0).sum().item()
+        bound = torch.Tensor(bounds)
+        bucketsize_result = torch.bucketize(grad, bound)
+        interval_nums = [(bucketsize_result == i).sum().item() for i in range(len(bound) + 1)]
+        interval_nums.append(grad_equal_0_num)
+        return_list = [x / element_num if element_num != 0 else 0 for x in interval_nums]
+        return return_list
+@register_csv_item(GradConst.MAX)
+class CSV_max(CsvItem):
+    def generate_csv_header(csv_header_input):
+        return ["max"]
+    def generate_csv_content(csv_content_input):
+        grad = csv_content_input.grad
+        return [torch.max(grad).cpu().detach().float().numpy().tolist()]
+@register_csv_item(GradConst.MIN)
+class CSV_max(CsvItem):
+    def generate_csv_header(csv_header_input):
+        return ["min"]
+    def generate_csv_content(csv_content_input):
+        grad = csv_content_input.grad
+        return [torch.min(grad).cpu().detach().float().numpy().tolist()]
+@register_csv_item(GradConst.NORM)
+class CSV_max(CsvItem):
+    def generate_csv_header(csv_header_input):
+        return ["norm"]
+    def generate_csv_content(csv_content_input):
+        grad = csv_content_input.grad
+        return [torch.norm(grad).cpu().detach().float().numpy().tolist()]
+@register_csv_item(GradConst.SHAPE)
+class CSV_shape(CsvItem):
+    def generate_csv_header(csv_header_input):
+        return ["shape"]
+    def generate_csv_content(csv_content_input):
+        grad = csv_content_input.grad
+        return [list(grad.shape)]

msprobe/pytorch/hook_module/hook_module.py CHANGED Viewed

@@ -17,10 +17,13 @@
 import functools
 import threading
 import torch
 import torch.nn as nn
 import torch.utils.hooks as full_hooks
 from msprobe.core.common.const import Const
+torch_version_above_or_equal_2 = torch.__version__.split('+')[0] >= '2.0'
 class HOOKModule(nn.Module):
@@ -46,9 +49,13 @@ class HOOKModule(nn.Module):
             else:
                 HOOKModule.module_count[self.prefix] += 1
                 self.prefix = self.prefix + str(HOOKModule.module_count[self.prefix] - 1) + Const.SEP
-            forward_pre_hook, forward_hook, backward_hook = build_hook(self.prefix)
-            self.register_forward_pre_hook(forward_pre_hook, with_kwargs=True)
-            self.register_forward_hook(forward_hook, with_kwargs=True)
+            forward_pre_hook, forward_hook, backward_hook, _ = build_hook(self.prefix)
+            if torch_version_above_or_equal_2:
+                self.register_forward_pre_hook(forward_pre_hook, with_kwargs=True)
+                self.register_forward_hook(forward_hook, with_kwargs=True)
+            else:
+                self.register_forward_pre_hook(forward_pre_hook)
+                self.register_forward_hook(forward_hook)
             self.register_backward_hook(backward_hook)
     def __call__(self, *input, **kwargs):
@@ -61,6 +68,10 @@ class HOOKModule(nn.Module):
             HOOKModule.inner_stop_hook[self.current_thread] = False
         return result
+    @classmethod
+    def reset_module_stats(cls):
+        cls.module_count = {}
     def _call_func(self, *input, **kwargs):
         full_backward_hooks, non_full_backward_hooks = [], []
         if len(self._backward_hooks) > 0:

msprobe/pytorch/hook_module/support_wrap_ops.yaml CHANGED Viewed

@@ -1873,4 +1873,5 @@ distributed:
   - reduce_scatter
   - _reduce_scatter_base
   - _all_gather_base
-  - all_to_all_single
+  - all_to_all_single
+  - all_to_all

msprobe/pytorch/hook_module/utils.py CHANGED Viewed

@@ -16,14 +16,14 @@
 """
 import os
-import yaml
+from msprobe.core.common.utils import load_yaml
-from msprobe.core.common.file_check import FileOpen
-cur_path = os.path.dirname(os.path.realpath(__file__))
-yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml")
-with FileOpen(yaml_path, 'r') as f:
-    Ops = yaml.safe_load(f)
-    WrapFunctionalOps = Ops.get('functional')
-    WrapTensorOps = Ops.get('tensor')
-    WrapTorchOps = Ops.get('torch')
+def get_ops():
+    cur_path = os.path.dirname(os.path.realpath(__file__))
+    yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml")
+    ops = load_yaml(yaml_path)
+    wrap_functional = ops.get('functional')
+    wrap_tensor = ops.get('tensor')
+    wrap_torch = ops.get('torch')
+    return set(wrap_functional) | set(wrap_tensor) | set(wrap_torch)

msprobe/pytorch/hook_module/wrap_aten.py CHANGED Viewed

@@ -18,18 +18,17 @@
 import os
 import torch
-import yaml
 from msprobe.pytorch.hook_module.hook_module import HOOKModule
 from msprobe.pytorch.common.utils import torch_device_guard
 from msprobe.core.common.const import Const
-from msprobe.core.common.file_check import FileOpen
+from msprobe.core.common.utils import load_yaml
+from msprobe.pytorch.function_factory import npu_custom_grad_functions
 cur_path = os.path.dirname(os.path.realpath(__file__))
 yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml")
-with FileOpen(yaml_path, 'r') as f:
-    WrapAtenOps = yaml.safe_load(f).get('aten')
+ops = load_yaml(yaml_path)
+wrap_aten_ops = ops.get('aten')
+white_aten_ops = ops.get('white_aten_ops', [])
 aten_func = {}
@@ -38,9 +37,9 @@ for f in dir(torch.ops.aten):
 def get_aten_ops():
-    global WrapAtenOps
+    global wrap_aten_ops
     _all_aten_ops = dir(torch.ops.aten)
-    return set(WrapAtenOps) & set(_all_aten_ops)
+    return set(wrap_aten_ops) & set(_all_aten_ops)
 class HOOKAtenOP(object):
@@ -48,7 +47,7 @@ class HOOKAtenOP(object):
 class AtenOPTemplate(HOOKModule):
-    def __init__(self, op, hook):
+    def __init__(self, op, hook, need_hook=True):
         if isinstance(op, torch._ops.OpOverloadPacket):
             op_name_ = op._qualified_op_name.split("::")[-1]
         else:
@@ -58,10 +57,21 @@ class AtenOPTemplate(HOOKModule):
                 op_name_ = op_name_ + '.' + overload_name
         self.op = op
         self.prefix_op_name_ = "Aten" + Const.SEP + str(op_name_) + Const.SEP
-        super().__init__(hook)
+        self.need_hook = need_hook
+        if self.need_hook:
+            super().__init__(hook)
     @torch_device_guard
     def forward(self, *args, **kwargs):
+        if isinstance(self.op, str):
+            if self.op in npu_custom_grad_functions:
+                return npu_custom_grad_functions[self.op](*args, **kwargs)
+            if self.op in white_aten_ops:
+                return eval(f"torch.ops.aten.{self.op}")(*args, **kwargs)
+            if self.op not in aten_func:
+                raise Exception(f"Skip op[{self.op}] accuracy check, because the op is not "
+                                f"in dir(torch.ops.aten) and support yaml.")
+            return aten_func[self.op](*args, **kwargs)
         return self.op(*args, **kwargs)

msprobe/pytorch/hook_module/wrap_distributed.py CHANGED Viewed

@@ -18,18 +18,15 @@
 import os
 from functools import wraps
 import torch.distributed as dist
-import yaml
 from msprobe.pytorch.hook_module.hook_module import HOOKModule
 from msprobe.pytorch.common.utils import torch_device_guard
 from msprobe.core.common.const import Const
-from msprobe.core.common.file_check import FileOpen
+from msprobe.core.common.utils import load_yaml
 cur_path = os.path.dirname(os.path.realpath(__file__))
 yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml")
-with FileOpen(yaml_path, 'r') as f:
-    WrapDistributedOps = yaml.safe_load(f).get('distributed')
 distributed_func = {}
@@ -38,9 +35,10 @@ for f in dir(dist):
 def get_distributed_ops():
-    global WrapDistributedOps
     _all_distributed_ops = dir(dist)
-    return set(WrapDistributedOps) & set(_all_distributed_ops)
+    yaml_data = load_yaml(yaml_path)
+    wrap_distributed_ops = yaml_data.get('distributed')
+    return set(wrap_distributed_ops) & set(_all_distributed_ops)
 class HOOKDistributedOP(object):
@@ -57,7 +55,12 @@ class DistributedOPTemplate(HOOKModule):
     @torch_device_guard
     def forward(self, *args, **kwargs):
-        return distributed_func.get(self.op_name_)(*args, **kwargs)
+        if kwargs.get("async_op") or self.op_name_ in ["isend", "irecv"]:
+            handle = distributed_func.get(self.op_name_)(*args, **kwargs)
+            handle.wait()
+            return handle
+        else:
+            return distributed_func.get(self.op_name_)(*args, **kwargs)
 def wrap_distributed_op(op_name, hook):

msprobe/pytorch/hook_module/wrap_functional.py CHANGED Viewed

@@ -16,15 +16,13 @@
 """
 import os
 import torch
-import yaml
 from msprobe.pytorch.hook_module.hook_module import HOOKModule
 from msprobe.pytorch.common.utils import torch_device_guard
 from msprobe.core.common.const import Const
 from msprobe.pytorch.common.log import logger
-from msprobe.core.common.file_check import FileOpen
+from msprobe.core.common.utils import load_yaml
 def remove_dropout():
@@ -66,14 +64,13 @@ def remove_dropout():
 cur_path = os.path.dirname(os.path.realpath(__file__))
 yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml")
-with FileOpen(yaml_path, 'r') as f:
-    WrapFunctionalOps = yaml.safe_load(f).get('functional')
 def get_functional_ops():
-    global WrapFunctionalOps
+    yaml_data = load_yaml(yaml_path)
+    wrap_functional_ops = yaml_data.get('functional')
     _all_functional_ops = dir(torch.nn.functional)
-    return set(WrapFunctionalOps) & set(_all_functional_ops)
+    return set(wrap_functional_ops) & set(_all_functional_ops)
 TorchFunctions = {func: getattr(torch.nn.functional, func) for func in get_functional_ops()}

msprobe/pytorch/hook_module/wrap_npu_custom.py CHANGED Viewed

@@ -17,27 +17,33 @@
 import os
 import torch
-import torch_npu
-import yaml
 from msprobe.pytorch.hook_module.hook_module import HOOKModule
 from msprobe.pytorch.common.utils import torch_device_guard, torch_without_guard_version
 from msprobe.core.common.const import Const
-from msprobe.core.common.file_check import FileOpen
+from msprobe.core.common.utils import load_yaml
+from msprobe.pytorch.function_factory import npu_custom_functions
 cur_path = os.path.dirname(os.path.realpath(__file__))
 yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml")
-with FileOpen(yaml_path, 'r') as f:
-    WrapNpuOps = yaml.safe_load(f).get('torch_npu')
+try:
+    import torch_npu
+except ImportError:
+    is_gpu = True
+else:
+    is_gpu = False
 def get_npu_ops():
-    global WrapNpuOps
     if torch_without_guard_version:
         _npu_ops = dir(torch.ops.npu)
     else:
         _npu_ops = dir(torch_npu._C._VariableFunctionsClass)
-    return set(WrapNpuOps) & set(_npu_ops)
+    yaml_data = load_yaml(yaml_path)
+    wrap_npu_ops = yaml_data.get('torch_npu')
+    return set(wrap_npu_ops) & set(_npu_ops)
 class HOOKNpuOP(object):
@@ -46,13 +52,19 @@ class HOOKNpuOP(object):
 class NpuOPTemplate(HOOKModule):
-    def __init__(self, op_name, hook):
+    def __init__(self, op_name, hook, need_hook=True):
         self.op_name_ = op_name
         self.prefix_op_name_ = "NPU" + Const.SEP + str(op_name) + Const.SEP
-        super().__init__(hook)
+        self.need_hook = need_hook
+        if need_hook:
+            super().__init__(hook)
     @torch_device_guard
     def forward(self, *args, **kwargs):
+        if not self.need_hook:
+            if self.op_name_ not in npu_custom_functions:
+                raise Exception(f'There is not bench function {self.op_name_}')
+            return npu_custom_functions[self.op_name_](*args, **kwargs)
         if torch_without_guard_version:
             return getattr(torch.ops.npu, str(self.op_name_))(*args, **kwargs)
         else:
@@ -60,7 +72,6 @@ class NpuOPTemplate(HOOKModule):
 def wrap_npu_op(op_name, hook):
     def npu_op_template(*args, **kwargs):
         return NpuOPTemplate(op_name, hook)(*args, **kwargs)

msprobe/pytorch/hook_module/wrap_tensor.py CHANGED Viewed

@@ -18,23 +18,22 @@
 import os
 import torch
-import yaml
 from msprobe.pytorch.hook_module.hook_module import HOOKModule
 from msprobe.pytorch.common.utils import torch_device_guard, parameter_adapter
 from msprobe.core.common.const import Const
-from msprobe.core.common.file_check import FileOpen
+from msprobe.core.common.utils import load_yaml
 cur_path = os.path.dirname(os.path.realpath(__file__))
 yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml")
-with FileOpen(yaml_path, 'r') as f:
-    WrapTensorOps = yaml.safe_load(f).get('tensor')
 def get_tensor_ops():
-    global WrapTensorOps
     _tensor_ops = dir(torch.Tensor)
-    return set(WrapTensorOps) & set(_tensor_ops)
+    yaml_data = load_yaml(yaml_path)
+    wrap_tensor_ops = yaml_data.get('tensor')
+    return set(wrap_tensor_ops) & set(_tensor_ops)
 TensorOps = {op: getattr(torch.Tensor, op) for op in get_tensor_ops()}

msprobe/pytorch/hook_module/wrap_torch.py CHANGED Viewed

@@ -16,25 +16,23 @@
 """
 import os
 import torch
-import yaml
 from msprobe.pytorch.hook_module.hook_module import HOOKModule
 from msprobe.pytorch.common.utils import torch_device_guard
 from msprobe.core.common.const import Const
-from msprobe.core.common.file_check import FileOpen
+from msprobe.core.common.utils import load_yaml
 cur_path = os.path.dirname(os.path.realpath(__file__))
 yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml")
-with FileOpen(yaml_path, 'r') as f:
-    WrapTorchOps = yaml.safe_load(f).get('torch')
 def get_torch_ops():
-    global WrapTorchOps
     _torch_ops = []
-    for operation in WrapTorchOps:
+    yaml_data = load_yaml(yaml_path)
+    wrap_torch_ops = yaml_data.get('torch')
+    for operation in wrap_torch_ops:
         if '.' in operation:
             operation_sub_module_name, operation_sub_op = operation.rsplit('.', 1)
             operation_sub_module = getattr(torch, operation_sub_module_name)

msprobe/pytorch/hook_module/wrap_vf.py CHANGED Viewed

@@ -16,24 +16,22 @@
 """
 import os
 import torch
-import yaml
+from msprobe.core.common.const import Const
+from msprobe.core.common.utils import load_yaml
 from msprobe.pytorch.hook_module.hook_module import HOOKModule
-from msprobe.core.common.file_check import FileOpen
 from msprobe.pytorch.common.utils import torch_device_guard
-from msprobe.core.common.const import Const
 cur_path = os.path.dirname(os.path.realpath(__file__))
 yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml")
-with FileOpen(yaml_path, 'r') as f:
-    WrapVfOps = yaml.safe_load(f).get('_VF')
 def get_vf_ops():
-    global WrapVfOps
-    return WrapVfOps
+    yaml_data = load_yaml(yaml_path)
+    wrap_vf_ops = yaml_data.get('_VF')
+    return wrap_vf_ops
 class HOOKVfOP(object):

mindstudio-probe 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

mindstudio-probe 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl