PyPI - mindstudio-probe - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (278) hide show

{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
msprobe/README.md +84 -18
msprobe/__init__.py +16 -1
msprobe/config.json +1 -5
msprobe/core/advisor/advisor.py +16 -11
msprobe/core/advisor/advisor_const.py +6 -7
msprobe/core/advisor/advisor_result.py +12 -12
msprobe/core/common/const.py +164 -3
msprobe/core/common/exceptions.py +26 -4
msprobe/core/common/file_utils.py +196 -27
msprobe/core/common/inplace_op_checker.py +53 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +46 -18
msprobe/core/common/utils.py +308 -209
msprobe/core/common_config.py +60 -38
msprobe/core/compare/acc_compare.py +332 -94
msprobe/core/compare/check.py +104 -22
msprobe/core/compare/compare_cli.py +42 -5
msprobe/core/compare/highlight.py +162 -57
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
msprobe/core/compare/multiprocessing_compute.py +33 -8
msprobe/core/compare/npy_compare.py +73 -29
msprobe/core/compare/utils.py +306 -247
msprobe/core/data_dump/data_collector.py +44 -43
msprobe/core/data_dump/data_processor/base.py +88 -35
msprobe/core/data_dump/data_processor/factory.py +20 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +14 -8
msprobe/core/data_dump/data_processor/pytorch_processor.py +180 -66
msprobe/core/data_dump/json_writer.py +63 -42
msprobe/core/data_dump/scope.py +143 -48
msprobe/core/grad_probe/constant.py +31 -13
msprobe/core/grad_probe/grad_compare.py +20 -4
msprobe/core/grad_probe/utils.py +44 -3
msprobe/core/overflow_check/abnormal_scene.py +185 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +29 -9
msprobe/docs/02.config_introduction.md +83 -84
msprobe/docs/03.config_examples.md +3 -20
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +143 -13
msprobe/docs/06.data_dump_MindSpore.md +197 -88
msprobe/docs/07.accuracy_checker_PyTorch.md +69 -46
msprobe/docs/08.accuracy_checker_online_PyTorch.md +52 -17
msprobe/docs/09.accuracy_checker_MindSpore.md +51 -15
msprobe/docs/10.accuracy_compare_PyTorch.md +187 -99
msprobe/docs/11.accuracy_compare_MindSpore.md +253 -31
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +60 -55
msprobe/docs/16.free_benchmarking_MindSpore.md +159 -0
msprobe/docs/17.grad_probe.md +19 -22
msprobe/docs/18.online_dispatch.md +89 -0
msprobe/docs/19.monitor.md +468 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +386 -0
msprobe/docs/22.visualization_MindSpore.md +384 -0
msprobe/docs/23.tool_function_introduction.md +28 -0
msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +25 -10
msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +16 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +130 -138
msprobe/mindspore/api_accuracy_checker/api_info.py +27 -5
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
msprobe/mindspore/api_accuracy_checker/main.py +27 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +58 -13
msprobe/mindspore/common/const.py +35 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +60 -5
msprobe/mindspore/compare/distributed_compare.py +15 -28
msprobe/mindspore/compare/ms_compare.py +319 -158
msprobe/mindspore/compare/ms_graph_compare.py +99 -49
msprobe/mindspore/debugger/debugger_config.py +20 -14
msprobe/mindspore/debugger/precision_debugger.py +43 -13
msprobe/mindspore/dump/dump_tool_factory.py +18 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +23 -3
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +203 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +107 -10
msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
msprobe/mindspore/dump/jit_dump.py +56 -20
msprobe/mindspore/dump/kernel_graph_dump.py +19 -5
msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -6
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +162 -41
msprobe/mindspore/free_benchmark/common/config.py +15 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +15 -1
msprobe/mindspore/free_benchmark/common/utils.py +37 -8
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +20 -5
msprobe/mindspore/free_benchmark/handler/check_handler.py +21 -7
msprobe/mindspore/free_benchmark/handler/fix_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -6
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +23 -8
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +29 -5
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +25 -10
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +45 -19
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +29 -8
msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +22 -7
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +17 -2
msprobe/mindspore/grad_probe/global_context.py +44 -14
msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +24 -10
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/ms_config.py +22 -15
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -6
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +75 -150
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +24 -7
msprobe/pytorch/__init__.py +23 -3
msprobe/pytorch/api_accuracy_checker/common/config.py +81 -2
msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +50 -25
msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +73 -33
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +44 -18
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +32 -11
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +122 -172
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +158 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +30 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +68 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +115 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +26 -9
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +18 -3
msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +20 -1
msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
msprobe/pytorch/bench_functions/linear.py +15 -0
msprobe/pytorch/bench_functions/matmul_backward.py +33 -6
msprobe/pytorch/bench_functions/npu_fusion_attention.py +280 -157
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +32 -9
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
msprobe/pytorch/bench_functions/swiglu.py +29 -6
msprobe/pytorch/common/__init__.py +15 -0
msprobe/pytorch/common/log.py +18 -6
msprobe/pytorch/common/parse_json.py +31 -16
msprobe/pytorch/common/utils.py +96 -40
msprobe/pytorch/compare/distributed_compare.py +13 -14
msprobe/pytorch/compare/match.py +15 -0
msprobe/pytorch/compare/pt_compare.py +44 -10
msprobe/pytorch/debugger/debugger_config.py +69 -52
msprobe/pytorch/debugger/precision_debugger.py +72 -24
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/free_benchmark/__init__.py +20 -5
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +43 -0
msprobe/pytorch/free_benchmark/common/params.py +23 -1
msprobe/pytorch/free_benchmark/common/utils.py +43 -5
msprobe/pytorch/free_benchmark/compare/grad_saver.py +47 -9
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +17 -0
msprobe/pytorch/free_benchmark/main.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +18 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +21 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +28 -2
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +19 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +65 -16
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +21 -5
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
msprobe/pytorch/function_factory.py +17 -2
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +13 -8
msprobe/pytorch/hook_module/hook_module.py +17 -19
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
msprobe/pytorch/hook_module/utils.py +4 -6
msprobe/pytorch/hook_module/wrap_aten.py +12 -11
msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
msprobe/pytorch/hook_module/wrap_functional.py +21 -20
msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
msprobe/pytorch/hook_module/wrap_torch.py +4 -6
msprobe/pytorch/hook_module/wrap_vf.py +4 -6
msprobe/pytorch/module_processer.py +18 -6
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +340 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +870 -0
msprobe/pytorch/monitor/module_metric.py +193 -0
msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
msprobe/pytorch/monitor/optimizer_collect.py +295 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
msprobe/pytorch/monitor/utils.py +250 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +38 -48
msprobe/pytorch/online_dispatch/dispatch.py +50 -25
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +60 -39
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +9 -1
msprobe/pytorch/online_dispatch/utils.py +48 -23
msprobe/pytorch/parse.py +15 -0
msprobe/pytorch/parse_tool/cli.py +5 -6
msprobe/pytorch/parse_tool/lib/compare.py +19 -26
msprobe/pytorch/parse_tool/lib/config.py +1 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +4 -2
msprobe/pytorch/parse_tool/lib/utils.py +40 -55
msprobe/pytorch/parse_tool/lib/visualization.py +3 -1
msprobe/pytorch/pt_config.py +192 -40
msprobe/pytorch/service.py +110 -35
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +165 -0
msprobe/visualization/builder/msprobe_adapter.py +205 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +130 -0
msprobe/visualization/compare/mode_adapter.py +211 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +124 -0
msprobe/visualization/graph/graph.py +200 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +214 -0
msprobe/visualization/utils.py +232 -0
mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
msprobe/docs/04.acl_config_examples.md +0 -76
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -43
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -107
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0
/msprobe/pytorch/{functional/data_processor.py → monitor/distributed/__init__.py} +0 -0

msprobe/core/data_dump/data_processor/pytorch_processor.py CHANGED Viewed

@@ -1,26 +1,52 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import hashlib
 import zlib
 from dataclasses import asdict
 from typing import List
 import numpy as np
 import torch
-from msprobe.core.common.file_utils import path_len_exceeds_limit, change_mode
+from torch import distributed as dist
+from msprobe.core.common.const import Const
+from msprobe.core.common.file_utils import path_len_exceeds_limit
 from msprobe.core.common.log import logger
-from msprobe.core.common.const import Const, OverflowConst, FileCheckConst
+from msprobe.core.common.utils import convert_tuple
 from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \
     ModuleForwardInputsOutputs, TensorStatInfo
-from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow
 from msprobe.pytorch.common.utils import save_pt, load_pt
+from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow
+from msprobe.core.common.utils import recursion_depth_decorator
+is_gpu = False
 try:
     import torch_npu
-    is_gpu = False
 except ImportError:
     is_gpu = True
 class PytorchDataProcessor(BaseDataProcessor):
-    pytorch_special_type = (torch.device, torch.dtype, torch.Size, torch.Tensor)
+    pytorch_special_type = (torch.device, torch.dtype, torch.Size, torch.Tensor, torch.memory_format, dist.ProcessGroup)
+    memory_format = {
+        torch.contiguous_format: "contiguous_format",
+        torch.channels_last: "channels_last",
+        torch.channels_last_3d: "channels_last_3d",
+        torch.preserve_format: "preserve_format"
+    }
     def __init__(self, config, data_writer):
         super().__init__(config, data_writer)
@@ -64,8 +90,8 @@ class PytorchDataProcessor(BaseDataProcessor):
         if data_clone.numel() == 0:
             return tensor_stat
         elif data_clone.dtype == torch.bool:
-            tensor_stat.max = True in data_clone
-            tensor_stat.min = False not in data_clone
+            tensor_stat.max = torch._C._VariableFunctionsClass.any(data_clone).item()
+            tensor_stat.min = torch._C._VariableFunctionsClass.all(data_clone).item()
         elif not data_clone.shape:
             tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data_clone.item()
         elif torch.is_complex(data_clone):
@@ -89,20 +115,46 @@ class PytorchDataProcessor(BaseDataProcessor):
         data_nan = torch._C._VariableFunctionsClass.isnan(data_clone)
         if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel():
             return float('nan')
         finite_mask = torch._C._VariableFunctionsClass.isfinite(data_clone)
         if int(torch._C._VariableFunctionsClass.sum(finite_mask)) > 0:
-            finite_values = data_clone[finite_mask]
+            finite_values = getattr(torch._C._TensorBase, "__getitem__")(data_clone, finite_mask)
             return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \
                 torch._C._VariableFunctionsClass.min(finite_values).item()
         else:
-            data_no_nan = data_clone[~data_nan]
+            data_no_nan = getattr(torch._C._TensorBase, "__getitem__")(data_clone, ~data_nan)
             return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \
                 torch._C._VariableFunctionsClass.min(data_no_nan).item()
+    @staticmethod
+    def process_group_hash(arg):
+        group_ranks = dist.get_process_group_ranks(arg)
+        group_ranks_hash = hashlib.md5(str(group_ranks).encode('utf-8')).hexdigest()
+        return group_ranks_hash
     @staticmethod
     def _analyze_torch_size(arg):
         return {"type": "torch.Size", "value": list(arg)}
+    @staticmethod
+    def _analyze_memory_format(arg):
+        # 获取内存格式
+        format_type = PytorchDataProcessor.memory_format.get(arg)
+        return {"type": "torch.memory_format", "format": format_type}
+    @staticmethod
+    def _analyze_process_group(arg):
+        group_info = {"type": "torch.ProcessGroup"}
+        try:
+            group_ranks = dist.get_process_group_ranks(arg)
+            group_info.update({"group_ranks": group_ranks})
+            group_id = PytorchDataProcessor.process_group_hash(arg)
+            group_info.update({"group_id": group_id})
+        except Exception as e:
+            logger.warning(f"Failed to get process group(id: {group_id}) ranks info with error info: {e}.")
+        return group_info
     @classmethod
     def get_special_types(cls):
         return super().get_special_types() + cls.pytorch_special_type
@@ -112,6 +164,10 @@ class PytorchDataProcessor(BaseDataProcessor):
             return self.torch_object_key[suffix_stack[-1]](element)
         if isinstance(element, torch.Size):
             return self._analyze_torch_size(element)
+        if isinstance(element, torch.memory_format):
+            return self._analyze_memory_format(element)
+        if isinstance(element, dist.ProcessGroup):
+            return self._analyze_process_group(element)
         converted_numpy, numpy_type = self._convert_numpy_to_builtin(element)
         if converted_numpy is not element:
             return self._analyze_numpy(converted_numpy, numpy_type)
@@ -153,7 +209,7 @@ class StatisticsDataProcessor(PytorchDataProcessor):
 class TensorDataProcessor(PytorchDataProcessor):
     def _analyze_tensor(self, tensor, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
-        saved_tensor = tensor.contiguous().detach()
+        saved_tensor = tensor.clone().contiguous().detach()
         save_pt(saved_tensor, file_path)
         single_arg = super()._analyze_tensor(tensor, suffix)
         single_arg.update({"data_name": dump_data_name})
@@ -178,7 +234,6 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
         if self.overflow_nums == -1:
             return False
         if self.real_overflow_nums >= self.overflow_nums:
-            logger.info(f"[msprobe] 超过预设溢出次数 当前溢出次数: {self.real_overflow_nums}")
             return True
         return False
@@ -219,6 +274,9 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
             for file_path, tensor in self.cached_tensors_and_file_paths.items():
                 save_pt(tensor, file_path)
             self.real_overflow_nums += 1
+            if self.overflow_nums != -1 and self.real_overflow_nums >= self.overflow_nums:
+                logger.info(f"[{Const.TOOL_NAME}] Reached the preset overflow times, "
+                            f"current overflow times: {self.real_overflow_nums}.")
         self.cached_tensors_and_file_paths = {}
     def _is_support_inf_nan(self):
@@ -243,7 +301,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
         if tensor_json['Max'] is None or tensor_json['Min'] is None:
             return
         self.has_overflow = np.isinf(tensor_json['Max']) or np.isnan(tensor_json['Max']) or \
-            np.isinf(tensor_json['Min']) or np.isnan(tensor_json['Min'])
+                            np.isinf(tensor_json['Min']) or np.isnan(tensor_json['Min'])
     def _analyze_tensor(self, tensor, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
@@ -303,64 +361,120 @@ class FreeBenchmarkDataProcessor(PytorchDataProcessor):
 class KernelDumpDataProcessor(PytorchDataProcessor):
-    forward_init_status = False
-    multi_output_apis = ["_sort_", "npu_flash_attention"]
     def __init__(self, config, data_writer):
         super().__init__(config, data_writer)
+        self.enable_kernel_dump = True
+        self.is_found_output_tensor = False
+        self.is_found_grad_input_tensor = False
+        self.forward_args = None
+        self.forward_kwargs = None
+        self.forward_output_tensor = None
+        self.grad_input_tensor = None
+    @staticmethod
+    def start_kernel_dump(config_path):
+        torch_npu.npu.synchronize()
+        torch_npu.npu.init_dump()
+        torch_npu.npu.set_dump(config_path)
+        torch_npu.npu.synchronize()
+    @staticmethod
+    def stop_kernel_dump():
+        torch_npu.npu.synchronize()
+        torch_npu.npu.finalize_dump()
+        torch_npu.npu.synchronize()
+    @staticmethod
+    def _print_unsupported_log(api_name):
+        logger.warning(f"The kernel dump does not support the {api_name} API.")
+    def analyze_pre_forward(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        if is_gpu:
+            logger.warning("The current environment is not a complete NPU environment, and kernel dump cannot be used.")
+            self.enable_kernel_dump = False
+            return
+        if self.config.is_backward_kernel_dump:
+            self.forward_args = self.clone_and_detach_tensor(module_input_output.args)
+            self.forward_kwargs = self.clone_and_detach_tensor(module_input_output.kwargs)
+            try:
+                output = module.forward(*self.forward_args, **self.forward_kwargs)
+            except Exception:
+                self._print_unsupported_log(name)
+                self.enable_kernel_dump = False
+                return
+            self.analyze_element(convert_tuple(output))
+            if not self.is_found_output_tensor:
+                self._print_unsupported_log(name)
+                self.enable_kernel_dump = False
+            return
+        self.start_kernel_dump(self.config.kernel_config_path)
     def analyze_forward(self, name, module, module_input_output):
-        if self.config.is_forward_acl_dump:
-            self.forward_acl_dump(name, module, module_input_output)
+        if not self.enable_kernel_dump:
+            return
+        if self.config.is_backward_kernel_dump:
+            return
+        self.enable_kernel_dump = False
+        self.stop_kernel_dump()
+        logger.info(f"The kernel data of {name} is dumped successfully.")
+    def analyze_backward(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        self.enable_kernel_dump = False
+        self.analyze_element(module_input_output.grad_input)
+        if not self.is_found_grad_input_tensor:
+            self._print_unsupported_log(name)
+            return
+        self.start_kernel_dump(self.config.kernel_config_path)
+        try:
+            self.forward_output_tensor.backward(self.grad_input_tensor, retain_graph=True)
+        except Exception:
+            self._print_unsupported_log(name)
+            self.stop_kernel_dump()
+            return
+        self.stop_kernel_dump()
+        logger.info(f"The kernel data of {name} is dumped successfully.")
+    @recursion_depth_decorator("KernelDump: KernelDumpDataProcessor.clone_and_detach_tensor")
+    def clone_and_detach_tensor(self, input_params):
+        if isinstance(input_params, torch.Tensor):
+            if input_params.requires_grad:
+                return input_params.clone().detach().requires_grad_()
+            return input_params.clone()
+        elif isinstance(input_params, tuple):
+            return tuple(self.clone_and_detach_tensor(x) for x in input_params)
+        elif isinstance(input_params, list):
+            return list(self.clone_and_detach_tensor(x) for x in input_params)
+        elif isinstance(input_params, dict):
+            return {k: self.clone_and_detach_tensor(v) for k, v in input_params.items()}
         else:
-            self.dump_mode_backward_acl_dump(name, module, module_input_output)
-    def forward_acl_dump(self, name, module, module_input_output):
-        if not KernelDumpDataProcessor.forward_init_status:
-            KernelDumpDataProcessor.forward_init_status = True
-            torch_npu.npu.synchronize()
-            torch_npu.npu.init_dump()
-            torch_npu.npu.set_dump(self.config.acl_config)
-            torch_npu.npu.synchronize()
-            if self.op_need_trigger(name):
-                module.forward(*module_input_output.args, **module_input_output.kwargs).cpu()
-            else:
-                module.forward(*module_input_output.args, **module_input_output.kwargs)
-            torch_npu.npu.synchronize()
-            torch_npu.npu.finalize_dump()
-            torch_npu.npu.synchronize()
-        KernelDumpDataProcessor.forward_init_status = False
-        logger.info("Dump %s op file." % name)
-    def acl_backward_dump_status(self, output, grad, module_name):
-        if isinstance(output, torch.Tensor):
-            output.backward(grad, retain_graph=True)
-            return True
+            return input_params
-        for api_name in KernelDumpDataProcessor.multi_output_apis:
-            if api_name in module_name:
-                output[0].backward(grad, retain_graph=True)
-                return True
-        return False
+    def analyze_single_element(self, element, suffix_stack):
+        if isinstance(element, torch.Tensor):
+            if not self.is_found_output_tensor:
+                if element.requires_grad:
+                    self.forward_output_tensor = element
+                    self.is_found_output_tensor = True
+                return {}
+            if not self.is_found_grad_input_tensor:
+                self.grad_input_tensor = element.clone()
+                self.is_found_grad_input_tensor = True
+        return {}
-    def dump_mode_backward_acl_dump(self, name, module, module_input_output):
-        grad_path = self.config.backward_input.get(name)
-        if not KernelDumpDataProcessor.forward_init_status:
-            KernelDumpDataProcessor.forward_init_status = True
-            output = module.forward(*module_input_output.args, **module_input_output.kwargs)
-            pt = load_pt(grad_path)
-            grad = pt.to("npu").requires_grad_()
-            torch_npu.npu.init_dump()
-            torch_npu.npu.set_dump(self.config.acl_config)
-            torch_npu.npu.synchronize()
-            if not self.acl_backward_dump_status(output, grad, name):
-                logger.warning("The output of {} is not of tensor type and cannot be automatically derived. "
-                               "you can manually construct a single API backward case for ACL dump.".format(
-                    name))
-            torch_npu.npu.synchronize()
-            torch_npu.npu.finalize_dump()
-        KernelDumpDataProcessor.forward_init_status = False
-        logger.info("Dump %s op file." % name)
-    def op_need_trigger(self, module_name):
-        return 'Tensor.__getitem__.' in module_name
+    def reset_status(self):
+        self.enable_kernel_dump = True
+        self.is_found_output_tensor = False
+        self.is_found_grad_input_tensor = False
+        self.forward_args = None
+        self.forward_kwargs = None
+        self.forward_output_tensor = None
+        self.grad_input_tensor = None

msprobe/core/data_dump/json_writer.py CHANGED Viewed

@@ -1,24 +1,36 @@
-import os
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import csv
+import os
-from msprobe.core.common.file_utils import change_mode, FileOpen
-from msprobe.core.common.log import logger
 from msprobe.core.common.const import Const, FileCheckConst
-from msprobe.core.common.file_utils import remove_path, load_json, save_json
+from msprobe.core.common.file_utils import change_mode, FileOpen, save_json
+from msprobe.core.common.log import logger
 class DataWriter:
-    def __init__(self, init_json=None) -> None:
-        self.dump_count = 0
-        self.init_json = init_json
-        self.dump_file_path = None  # os.path.join(dump_dir, DataWriter.dump_json_name)
-        self.stack_file_path = None  # os.path.join(dump_dir, DataWriter.stack_json_name)
-        self.construct_file_path = None  # os.path.join(dump_dir, DataWriter.construct_json_name)
+    def __init__(self) -> None:
+        self.dump_file_path = None
+        self.stack_file_path = None
+        self.construct_file_path = None
         self.free_benchmark_file_path = None
         self.dump_tensor_data_dir = None
-        self.buffer_size = 1000
-        self.cache_data = {Const.DATA: {}}
+        self.flush_size = 1000
+        self.cache_data = {}
         self.cache_stack = {}
         self.cache_construct = {}
@@ -37,18 +49,22 @@ class DataWriter:
         if is_new_file:
             change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY)
-    def initialize_json_file(self, **kwargs):
-        kwargs.update({"dump_data_dir": self.dump_tensor_data_dir, Const.DATA: {}})
-        save_json(self.dump_file_path, kwargs)
-        empty_dict = {}
-        remove_path(self.stack_file_path)
-        save_json(self.stack_file_path, empty_dict)
-        remove_path(self.construct_file_path)
-        save_json(self.construct_file_path, empty_dict)
+    def reset_cache(self):
+        self.cache_data = {}
+        self.cache_stack = {}
+        self.cache_construct = {}
-    def update_dump_paths(self, dump_file_path, stack_file_path, construct_file_path, dump_data_dir,
+    def initialize_json_file(self, **kwargs):
+        if not self.cache_data:
+            kwargs.update({"dump_data_dir": self.dump_tensor_data_dir, Const.DATA: {}})
+            self.cache_data = kwargs
+            save_json(self.dump_file_path, self.cache_data, indent=1)
+        if not self.cache_stack:
+            save_json(self.stack_file_path, self.cache_stack, indent=1)
+        if not self.cache_construct:
+            save_json(self.construct_file_path, self.cache_construct, indent=1)
+    def update_dump_paths(self, dump_file_path, stack_file_path, construct_file_path, dump_data_dir,
                           free_benchmark_file_path):
         self.dump_file_path = dump_file_path
         self.stack_file_path = stack_file_path
@@ -56,16 +72,25 @@ class DataWriter:
         self.dump_tensor_data_dir = dump_data_dir
         self.free_benchmark_file_path = free_benchmark_file_path
+    def flush_data_periodically(self):
+        dump_data = self.cache_data.get(Const.DATA)
+        if dump_data and isinstance(dump_data, dict) and len(dump_data) % self.flush_size == 0:
+            self.write_json()
     def update_data(self, new_data):
-        key = next(iter(new_data.keys()))  # assert len(new_data.keys()) == 1
-        if key in self.cache_data[Const.DATA]:
-            self.cache_data[Const.DATA][key].update(new_data[key])
-        else:
-            self.cache_data[Const.DATA].update(new_data)
+        if not isinstance(new_data, dict) or len(new_data.keys()) != 1:
+            logger.warning(f"The data info({new_data}) should be a dict with only one outer key.")
+            return
+        dump_data = self.cache_data.get(Const.DATA)
+        if not isinstance(dump_data, dict):
+            logger.warning(f"The dump data({dump_data}) should be a dict.")
+            return
-    def flush_data_when_buffer_is_full(self):
-        if len(self.cache_data[Const.DATA]) >= self.buffer_size:
-            self.write_data_json(self.dump_file_path)
+        key = next(iter(new_data.keys()))
+        if key in dump_data:
+            dump_data.get(key).update(new_data.get(key))
+        else:
+            dump_data.update(new_data)
     def update_stack(self, new_data):
         self.cache_stack.update(new_data)
@@ -75,14 +100,7 @@ class DataWriter:
     def write_data_json(self, file_path):
         logger.info(f"dump.json is at {os.path.dirname(os.path.dirname(file_path))}. ")
-        if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
-            data_to_write = load_json(file_path)
-        else:
-            self.init_json['data_path'] = self.dump_tensor_data_dir
-            data_to_write = self.init_json
-        data_to_write[Const.DATA].update(self.cache_data[Const.DATA])
-        save_json(file_path, data_to_write, indent=1)
-        self.cache_data[Const.DATA].clear()
+        save_json(file_path, self.cache_data, indent=1)
     def write_stack_info_json(self, file_path):
         save_json(file_path, self.cache_stack, indent=1)
@@ -91,6 +109,9 @@ class DataWriter:
         save_json(file_path, self.cache_construct, indent=1)
     def write_json(self):
-        self.write_data_json(self.dump_file_path)
-        self.write_stack_info_json(self.stack_file_path)
-        self.write_construct_info_json(self.construct_file_path)
+        if self.cache_data:
+            self.write_data_json(self.dump_file_path)
+        if self.cache_stack:
+            self.write_stack_info_json(self.stack_file_path)
+        if self.cache_construct:
+            self.write_construct_info_json(self.construct_file_path)

mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.1py3-none-any.whl