PyPI - mindstudio-probe - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
msprobe/README.md +39 -3
msprobe/config.json +1 -3
msprobe/core/advisor/advisor.py +8 -3
msprobe/core/common/const.py +113 -13
msprobe/core/common/exceptions.py +25 -3
msprobe/core/common/file_utils.py +150 -26
msprobe/core/common/inplace_op_checker.py +15 -0
msprobe/core/common/log.py +27 -9
msprobe/core/common/utils.py +182 -69
msprobe/core/common_config.py +44 -15
msprobe/core/compare/acc_compare.py +207 -142
msprobe/core/compare/check.py +2 -5
msprobe/core/compare/compare_cli.py +21 -4
msprobe/core/compare/highlight.py +124 -55
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
msprobe/core/compare/npy_compare.py +52 -23
msprobe/core/compare/utils.py +272 -247
msprobe/core/data_dump/data_collector.py +13 -11
msprobe/core/data_dump/data_processor/base.py +46 -16
msprobe/core/data_dump/data_processor/mindspore_processor.py +4 -4
msprobe/core/data_dump/data_processor/pytorch_processor.py +156 -59
msprobe/core/data_dump/scope.py +113 -34
msprobe/core/grad_probe/constant.py +27 -13
msprobe/core/grad_probe/grad_compare.py +18 -1
msprobe/core/grad_probe/utils.py +30 -2
msprobe/core/overflow_check/abnormal_scene.py +185 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +10 -0
msprobe/docs/02.config_introduction.md +49 -22
msprobe/docs/03.config_examples.md +2 -9
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +3 -1
msprobe/docs/06.data_dump_MindSpore.md +157 -90
msprobe/docs/07.accuracy_checker_PyTorch.md +12 -12
msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +19 -13
msprobe/docs/11.accuracy_compare_MindSpore.md +104 -13
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
msprobe/docs/17.grad_probe.md +5 -6
msprobe/docs/19.monitor.md +468 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +386 -0
msprobe/docs/22.visualization_MindSpore.md +384 -0
msprobe/docs/23.tool_function_introduction.md +28 -0
msprobe/docs/FAQ.md +3 -0
msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/mindspore/__init__.py +15 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +113 -145
msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
msprobe/mindspore/api_accuracy_checker/main.py +27 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +33 -12
msprobe/mindspore/common/const.py +33 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +43 -4
msprobe/mindspore/compare/distributed_compare.py +22 -22
msprobe/mindspore/compare/ms_compare.py +271 -248
msprobe/mindspore/compare/ms_graph_compare.py +81 -47
msprobe/mindspore/debugger/debugger_config.py +4 -1
msprobe/mindspore/debugger/precision_debugger.py +7 -1
msprobe/mindspore/dump/dump_tool_factory.py +3 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +12 -2
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +13 -16
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +25 -0
msprobe/mindspore/dump/jit_dump.py +17 -5
msprobe/mindspore/dump/kernel_graph_dump.py +2 -4
msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +145 -39
msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
msprobe/mindspore/free_benchmark/common/utils.py +19 -4
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
msprobe/mindspore/grad_probe/global_context.py +28 -8
msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +24 -10
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/ms_config.py +22 -15
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +2 -4
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +36 -30
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +24 -7
msprobe/pytorch/__init__.py +3 -2
msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +6 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +19 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +13 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +77 -53
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +15 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +100 -6
msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
msprobe/pytorch/bench_functions/swiglu.py +10 -2
msprobe/pytorch/common/parse_json.py +6 -6
msprobe/pytorch/common/utils.py +56 -5
msprobe/pytorch/compare/distributed_compare.py +8 -9
msprobe/pytorch/compare/pt_compare.py +8 -6
msprobe/pytorch/debugger/debugger_config.py +19 -15
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +15 -0
msprobe/pytorch/free_benchmark/common/params.py +8 -1
msprobe/pytorch/free_benchmark/common/utils.py +26 -4
msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -3
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +10 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
msprobe/pytorch/hook_module/wrap_functional.py +14 -12
msprobe/pytorch/module_processer.py +2 -5
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +340 -0
msprobe/pytorch/monitor/distributed/__init__.py +0 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +870 -0
msprobe/pytorch/monitor/module_metric.py +193 -0
msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
msprobe/pytorch/monitor/optimizer_collect.py +295 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
msprobe/pytorch/monitor/utils.py +250 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +29 -38
msprobe/pytorch/online_dispatch/dispatch.py +50 -25
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +53 -32
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
msprobe/pytorch/online_dispatch/utils.py +49 -21
msprobe/pytorch/parse_tool/lib/compare.py +12 -18
msprobe/pytorch/parse_tool/lib/config.py +1 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +1 -2
msprobe/pytorch/parse_tool/lib/utils.py +16 -35
msprobe/pytorch/parse_tool/lib/visualization.py +2 -0
msprobe/pytorch/pt_config.py +31 -8
msprobe/pytorch/service.py +15 -5
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +165 -0
msprobe/visualization/builder/msprobe_adapter.py +205 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +130 -0
msprobe/visualization/compare/mode_adapter.py +211 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +124 -0
msprobe/visualization/graph/graph.py +200 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +214 -0
msprobe/visualization/utils.py +232 -0
mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
msprobe/docs/04.acl_config_examples.md +0 -78
msprobe/mindspore/compare/layer_mapping.py +0 -146
msprobe/mindspore/compare/modify_mapping.py +0 -107
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0

msprobe/core/data_dump/data_collector.py CHANGED Viewed

@@ -13,9 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import atexit
 import os
-from msprobe.core.data_dump.scope import build_scope, ListScope
+from msprobe.core.data_dump.scope import ScopeFactory
 from msprobe.core.data_dump.json_writer import DataWriter
 from msprobe.core.common.log import logger
 from msprobe.core.common.const import Const
@@ -27,7 +28,6 @@ def build_data_collector(config):
 class DataCollector:
-    multi_output_apis = ["_sort_", "npu_flash_attention"]
     tasks_need_tensor_data = [Const.OVERFLOW_CHECK, Const.TENSOR, Const.FREE_BENCHMARK]
     level_without_construct = [Const.LEVEL_L1, Const.LEVEL_L2]
@@ -37,13 +37,8 @@ class DataCollector:
         self.data_processor = DataProcessorFactory.create_processor(self.config, self.data_writer)
         self.module_processor = DataProcessorFactory.get_module_processor(self.config.framework)
         self.module_count = {}
-        if self.config.task == Const.FREE_BENCHMARK:
-            self.scope = build_scope(ListScope, self.config.scope, self.config.list)
-        else:
-            self.scope = build_scope(None, self.config.scope, self.config.list)
-    def __del__(self):
-        self.write_json()
+        self.scope = ScopeFactory(self.config).build_scope()
+        atexit.register(self.write_json)
     @property
     def dump_data_dir(self):
@@ -85,6 +80,10 @@ class DataCollector:
         self.data_writer.update_data(data_info)
     def pre_forward_data_collect(self, name, module, pid, module_input_output):
+        if self.config.level == Const.LEVEL_L2 and self.check_scope_and_pid(self.scope, name, pid):
+            self.data_processor.analyze_pre_forward(name, module, module_input_output)
+            return
         backward_name = name.replace(Const.FORWARD, Const.BACKWARD)
         if self.check_scope_and_pid(self.scope, backward_name, pid):
             self.data_processor.analyze_pre_forward(backward_name, module, module_input_output)
@@ -98,13 +97,14 @@ class DataCollector:
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
+        if self.config.level == Const.LEVEL_L2:
+            self.data_processor.analyze_forward(name, module, module_input_output)
+            return
         if not self.is_inplace(module):
             data_info = self.data_processor.analyze_forward(name, module, module_input_output)
         else:
             data_info = self.data_processor.analyze_forward_inplace(name, module_input_output)
-        if self.config.level == "L2":
-            return
         self.data_writer.update_stack(self.data_processor.analyze_api_call_stack(name))
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
@@ -114,6 +114,8 @@ class DataCollector:
             return
         data_info = self.data_processor.analyze_backward(name, module, module_input_output)
+        if self.config.level == Const.LEVEL_L2:
+            return
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
     def backward_input_data_collect(self, name, module, pid, module_input_output):

msprobe/core/data_dump/data_processor/base.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
-# Licensed under the Apache License, Version 2.0  (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -15,10 +15,11 @@
 import inspect
 import os
-from dataclasses import dataclass
+from dataclasses import dataclass, is_dataclass
 from typing import Tuple, Dict, Optional, Any
 import numpy as np
 from msprobe.core.common.const import Const
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import convert_tuple, CompareException
@@ -101,6 +102,8 @@ class BaseDataProcessor:
         self.current_iter = 0
         self._return_forward_new_output = False
         self._forward_new_output = None
+        if hasattr(config, "data_mode"):
+            self.allowed_data_mode = self._get_allowed_data_mode(config.data_mode)
     @property
     def data_path(self):
@@ -182,6 +185,18 @@ class BaseDataProcessor:
     def _analyze_numpy(value, numpy_type):
         return {"type": numpy_type, "value": value}
+    @staticmethod
+    def _get_allowed_data_mode(data_mode):
+        if Const.ALL in data_mode:
+            allowed_data_mode = [Const.FORWARD, Const.BACKWARD, Const.INPUT, Const.OUTPUT]
+        else:
+            allowed_data_mode = list(set(data_mode))
+            if Const.FORWARD not in allowed_data_mode and Const.BACKWARD not in allowed_data_mode:
+                allowed_data_mode += [Const.FORWARD, Const.BACKWARD]
+            if Const.INPUT not in allowed_data_mode and Const.OUTPUT not in allowed_data_mode:
+                allowed_data_mode += [Const.INPUT, Const.OUTPUT]
+        return allowed_data_mode
     @classmethod
     def get_special_types(cls):
         return cls.special_type
@@ -194,25 +209,42 @@ class BaseDataProcessor:
         if isinstance(args, cls.get_special_types()):
             arg_transform = transform(args, cls._recursive_key_stack)
             return arg_transform
+        elif isinstance(args, tuple) and hasattr(args, '_fields'):
+            # namedtuple to dict
+            args_dict = {field: getattr(args, field) for field in args._fields}
+            return cls.apply_transform_dict(args_dict, transform, depth)
+        elif is_dataclass(args):
+            # dataclass to dict
+            args_dict = {field: getattr(args, field) for field in args.__dataclass_fields__}
+            return cls.apply_transform_dict(args_dict, transform, depth)
         elif isinstance(args, (list, tuple)):
-            result_list = []
-            for i, arg in enumerate(args):
-                cls._recursive_key_stack.append(str(i))
-                result_list.append(cls.recursive_apply_transform(arg, transform, depth=depth + 1))
-                cls._recursive_key_stack.pop()
+            result_list = cls.apply_transform_list(args, transform, depth)
             return type(args)(result_list)
         elif isinstance(args, dict):
-            result_dict = {}
-            for k, arg in args.items():
-                cls._recursive_key_stack.append(str(k))
-                result_dict[k] = cls.recursive_apply_transform(arg, transform, depth=depth + 1)
-                cls._recursive_key_stack.pop()
-            return result_dict
+            return cls.apply_transform_dict(args, transform, depth)
         elif args is not None:
             logger.warning(f"Data type {type(args)} is not supported.")
             return None
         else:
             return None
+    @classmethod
+    def apply_transform_dict(cls, args, transform, depth):
+        result_dict = {}
+        for k, arg in args.items():
+            cls._recursive_key_stack.append(str(k))
+            result_dict[k] = cls.recursive_apply_transform(arg, transform, depth=depth + 1)
+            cls._recursive_key_stack.pop()
+        return result_dict
+    @classmethod
+    def apply_transform_list(cls, args, transform, depth):
+        result_list = []
+        for i, arg in enumerate(args):
+            cls._recursive_key_stack.append(str(i))
+            result_list.append(cls.recursive_apply_transform(arg, transform, depth=depth + 1))
+            cls._recursive_key_stack.pop()
+        return result_list
     def if_return_forward_new_output(self):
         return self._return_forward_new_output
@@ -239,9 +271,7 @@ class BaseDataProcessor:
         Return:
             bool: True if the parameters are in data_mode or data_mode is all, False otherwise.
         """
-        return (Const.ALL in self.config.data_mode or
-                forward_backward in self.config.data_mode or
-                input_output in self.config.data_mode)
+        return forward_backward in self.allowed_data_mode and input_output in self.allowed_data_mode
     def analyze_pre_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         pass

msprobe/core/data_dump/data_processor/mindspore_processor.py CHANGED Viewed

@@ -41,7 +41,7 @@ class MindsporeDataProcessor(BaseDataProcessor):
     @staticmethod
     def get_md5_for_tensor(x):
         x = convert_bf16_to_fp32(x)
-        tensor_bytes = x.asnumpy().tobytes()
+        tensor_bytes = x.contiguous().asnumpy().tobytes()
         crc32_hash = zlib.crc32(tensor_bytes)
         return f"{crc32_hash:08x}"
@@ -58,19 +58,19 @@ class MindsporeDataProcessor(BaseDataProcessor):
         if data.numel() == 0:
             return tensor_stat
         elif data.dtype == ms.bool_:
-            data_np = data.asnumpy()
+            data_np = data.contiguous().asnumpy()
             tensor_stat.max = np.max(data_np).item()
             tensor_stat.min = np.min(data_np).item()
         elif not data.shape:
             tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item()
         elif data.dtype == ms.complex64 or data.dtype == ms.complex128:
-            data_abs = np.abs(data.asnumpy())
+            data_abs = np.abs(data.contiguous().asnumpy())
             tensor_stat.max = np.max(data_abs).item()
             tensor_stat.min = np.min(data_abs).item()
             tensor_stat.mean = np.mean(data_abs).item()
             tensor_stat.norm = np.linalg.norm(data_abs).item()
         else:
-            if not ops.is_floating_point(data):
+            if not ops.is_floating_point(data) or data.dtype == ms.float64:
                 data = data.to(ms.float32)
             api_register.norm_inner_op_set_ori_func()
             get_max_value = api_register.mint_ops_ori_attr.get("max", mint.max)

msprobe/core/data_dump/data_processor/pytorch_processor.py CHANGED Viewed

@@ -13,19 +13,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import hashlib
 import zlib
 from dataclasses import asdict
 from typing import List
 import numpy as np
 import torch
+from torch import distributed as dist
 from msprobe.core.common.const import Const
 from msprobe.core.common.file_utils import path_len_exceeds_limit
 from msprobe.core.common.log import logger
+from msprobe.core.common.utils import convert_tuple
 from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \
     ModuleForwardInputsOutputs, TensorStatInfo
 from msprobe.pytorch.common.utils import save_pt, load_pt
 from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow
+from msprobe.core.common.utils import recursion_depth_decorator
 is_gpu = False
 try:
@@ -35,7 +40,13 @@ except ImportError:
 class PytorchDataProcessor(BaseDataProcessor):
-    pytorch_special_type = (torch.device, torch.dtype, torch.Size, torch.Tensor)
+    pytorch_special_type = (torch.device, torch.dtype, torch.Size, torch.Tensor, torch.memory_format, dist.ProcessGroup)
+    memory_format = {
+        torch.contiguous_format: "contiguous_format",
+        torch.channels_last: "channels_last",
+        torch.channels_last_3d: "channels_last_3d",
+        torch.preserve_format: "preserve_format"
+    }
     def __init__(self, config, data_writer):
         super().__init__(config, data_writer)
@@ -79,8 +90,8 @@ class PytorchDataProcessor(BaseDataProcessor):
         if data_clone.numel() == 0:
             return tensor_stat
         elif data_clone.dtype == torch.bool:
-            tensor_stat.max = True in data_clone
-            tensor_stat.min = False not in data_clone
+            tensor_stat.max = torch._C._VariableFunctionsClass.any(data_clone).item()
+            tensor_stat.min = torch._C._VariableFunctionsClass.all(data_clone).item()
         elif not data_clone.shape:
             tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data_clone.item()
         elif torch.is_complex(data_clone):
@@ -104,20 +115,46 @@ class PytorchDataProcessor(BaseDataProcessor):
         data_nan = torch._C._VariableFunctionsClass.isnan(data_clone)
         if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel():
             return float('nan')
         finite_mask = torch._C._VariableFunctionsClass.isfinite(data_clone)
         if int(torch._C._VariableFunctionsClass.sum(finite_mask)) > 0:
-            finite_values = data_clone[finite_mask]
+            finite_values = getattr(torch._C._TensorBase, "__getitem__")(data_clone, finite_mask)
             return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \
                 torch._C._VariableFunctionsClass.min(finite_values).item()
         else:
-            data_no_nan = data_clone[~data_nan]
+            data_no_nan = getattr(torch._C._TensorBase, "__getitem__")(data_clone, ~data_nan)
             return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \
                 torch._C._VariableFunctionsClass.min(data_no_nan).item()
+    @staticmethod
+    def process_group_hash(arg):
+        group_ranks = dist.get_process_group_ranks(arg)
+        group_ranks_hash = hashlib.md5(str(group_ranks).encode('utf-8')).hexdigest()
+        return group_ranks_hash
     @staticmethod
     def _analyze_torch_size(arg):
         return {"type": "torch.Size", "value": list(arg)}
+    @staticmethod
+    def _analyze_memory_format(arg):
+        # 获取内存格式
+        format_type = PytorchDataProcessor.memory_format.get(arg)
+        return {"type": "torch.memory_format", "format": format_type}
+    @staticmethod
+    def _analyze_process_group(arg):
+        group_info = {"type": "torch.ProcessGroup"}
+        try:
+            group_ranks = dist.get_process_group_ranks(arg)
+            group_info.update({"group_ranks": group_ranks})
+            group_id = PytorchDataProcessor.process_group_hash(arg)
+            group_info.update({"group_id": group_id})
+        except Exception as e:
+            logger.warning(f"Failed to get process group(id: {group_id}) ranks info with error info: {e}.")
+        return group_info
     @classmethod
     def get_special_types(cls):
         return super().get_special_types() + cls.pytorch_special_type
@@ -127,6 +164,10 @@ class PytorchDataProcessor(BaseDataProcessor):
             return self.torch_object_key[suffix_stack[-1]](element)
         if isinstance(element, torch.Size):
             return self._analyze_torch_size(element)
+        if isinstance(element, torch.memory_format):
+            return self._analyze_memory_format(element)
+        if isinstance(element, dist.ProcessGroup):
+            return self._analyze_process_group(element)
         converted_numpy, numpy_type = self._convert_numpy_to_builtin(element)
         if converted_numpy is not element:
             return self._analyze_numpy(converted_numpy, numpy_type)
@@ -320,64 +361,120 @@ class FreeBenchmarkDataProcessor(PytorchDataProcessor):
 class KernelDumpDataProcessor(PytorchDataProcessor):
-    forward_init_status = False
-    multi_output_apis = ["_sort_", "npu_flash_attention"]
     def __init__(self, config, data_writer):
         super().__init__(config, data_writer)
+        self.enable_kernel_dump = True
+        self.is_found_output_tensor = False
+        self.is_found_grad_input_tensor = False
+        self.forward_args = None
+        self.forward_kwargs = None
+        self.forward_output_tensor = None
+        self.grad_input_tensor = None
+    @staticmethod
+    def start_kernel_dump(config_path):
+        torch_npu.npu.synchronize()
+        torch_npu.npu.init_dump()
+        torch_npu.npu.set_dump(config_path)
+        torch_npu.npu.synchronize()
+    @staticmethod
+    def stop_kernel_dump():
+        torch_npu.npu.synchronize()
+        torch_npu.npu.finalize_dump()
+        torch_npu.npu.synchronize()
+    @staticmethod
+    def _print_unsupported_log(api_name):
+        logger.warning(f"The kernel dump does not support the {api_name} API.")
+    def analyze_pre_forward(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        if is_gpu:
+            logger.warning("The current environment is not a complete NPU environment, and kernel dump cannot be used.")
+            self.enable_kernel_dump = False
+            return
+        if self.config.is_backward_kernel_dump:
+            self.forward_args = self.clone_and_detach_tensor(module_input_output.args)
+            self.forward_kwargs = self.clone_and_detach_tensor(module_input_output.kwargs)
+            try:
+                output = module.forward(*self.forward_args, **self.forward_kwargs)
+            except Exception:
+                self._print_unsupported_log(name)
+                self.enable_kernel_dump = False
+                return
+            self.analyze_element(convert_tuple(output))
+            if not self.is_found_output_tensor:
+                self._print_unsupported_log(name)
+                self.enable_kernel_dump = False
+            return
+        self.start_kernel_dump(self.config.kernel_config_path)
     def analyze_forward(self, name, module, module_input_output):
-        if self.config.is_forward_acl_dump:
-            self.forward_acl_dump(name, module, module_input_output)
+        if not self.enable_kernel_dump:
+            return
+        if self.config.is_backward_kernel_dump:
+            return
+        self.enable_kernel_dump = False
+        self.stop_kernel_dump()
+        logger.info(f"The kernel data of {name} is dumped successfully.")
+    def analyze_backward(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        self.enable_kernel_dump = False
+        self.analyze_element(module_input_output.grad_input)
+        if not self.is_found_grad_input_tensor:
+            self._print_unsupported_log(name)
+            return
+        self.start_kernel_dump(self.config.kernel_config_path)
+        try:
+            self.forward_output_tensor.backward(self.grad_input_tensor, retain_graph=True)
+        except Exception:
+            self._print_unsupported_log(name)
+            self.stop_kernel_dump()
+            return
+        self.stop_kernel_dump()
+        logger.info(f"The kernel data of {name} is dumped successfully.")
+    @recursion_depth_decorator("KernelDump: KernelDumpDataProcessor.clone_and_detach_tensor")
+    def clone_and_detach_tensor(self, input_params):
+        if isinstance(input_params, torch.Tensor):
+            if input_params.requires_grad:
+                return input_params.clone().detach().requires_grad_()
+            return input_params.clone()
+        elif isinstance(input_params, tuple):
+            return tuple(self.clone_and_detach_tensor(x) for x in input_params)
+        elif isinstance(input_params, list):
+            return list(self.clone_and_detach_tensor(x) for x in input_params)
+        elif isinstance(input_params, dict):
+            return {k: self.clone_and_detach_tensor(v) for k, v in input_params.items()}
         else:
-            self.dump_mode_backward_acl_dump(name, module, module_input_output)
-    def forward_acl_dump(self, name, module, module_input_output):
-        if not KernelDumpDataProcessor.forward_init_status:
-            KernelDumpDataProcessor.forward_init_status = True
-            torch_npu.npu.synchronize()
-            torch_npu.npu.init_dump()
-            torch_npu.npu.set_dump(self.config.acl_config)
-            torch_npu.npu.synchronize()
-            if self.op_need_trigger(name):
-                module.forward(*module_input_output.args, **module_input_output.kwargs).cpu()
-            else:
-                module.forward(*module_input_output.args, **module_input_output.kwargs)
-            torch_npu.npu.synchronize()
-            torch_npu.npu.finalize_dump()
-            torch_npu.npu.synchronize()
-        KernelDumpDataProcessor.forward_init_status = False
-        logger.info("Dump %s op file." % name)
-    def acl_backward_dump_status(self, output, grad, module_name):
-        if isinstance(output, torch.Tensor):
-            output.backward(grad, retain_graph=True)
-            return True
+            return input_params
-        for api_name in KernelDumpDataProcessor.multi_output_apis:
-            if api_name in module_name:
-                output[0].backward(grad, retain_graph=True)
-                return True
-        return False
+    def analyze_single_element(self, element, suffix_stack):
+        if isinstance(element, torch.Tensor):
+            if not self.is_found_output_tensor:
+                if element.requires_grad:
+                    self.forward_output_tensor = element
+                    self.is_found_output_tensor = True
+                return {}
+            if not self.is_found_grad_input_tensor:
+                self.grad_input_tensor = element.clone()
+                self.is_found_grad_input_tensor = True
+        return {}
-    def dump_mode_backward_acl_dump(self, name, module, module_input_output):
-        grad_path = self.config.backward_input.get(name)
-        if not KernelDumpDataProcessor.forward_init_status:
-            KernelDumpDataProcessor.forward_init_status = True
-            output = module.forward(*module_input_output.args, **module_input_output.kwargs)
-            pt = load_pt(grad_path)
-            grad = pt.to("npu").requires_grad_()
-            torch_npu.npu.init_dump()
-            torch_npu.npu.set_dump(self.config.acl_config)
-            torch_npu.npu.synchronize()
-            if not self.acl_backward_dump_status(output, grad, name):
-                logger.warning("The output of {} is not of tensor type and cannot be automatically derived. "
-                               "you can manually construct a single API backward case for ACL dump.".format(
-                    name))
-            torch_npu.npu.synchronize()
-            torch_npu.npu.finalize_dump()
-        KernelDumpDataProcessor.forward_init_status = False
-        logger.info("Dump %s op file." % name)
-    def op_need_trigger(self, module_name):
-        return 'Tensor.__getitem__.' in module_name
+    def reset_status(self):
+        self.enable_kernel_dump = True
+        self.is_found_output_tensor = False
+        self.is_found_grad_input_tensor = False
+        self.forward_args = None
+        self.forward_kwargs = None
+        self.forward_output_tensor = None
+        self.grad_input_tensor = None

mindstudio-probe 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl