PyPI - mindstudio-probe - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
msprobe/README.md +39 -3
msprobe/config.json +1 -3
msprobe/core/advisor/advisor.py +8 -3
msprobe/core/common/const.py +113 -13
msprobe/core/common/exceptions.py +25 -3
msprobe/core/common/file_utils.py +150 -26
msprobe/core/common/inplace_op_checker.py +15 -0
msprobe/core/common/log.py +27 -9
msprobe/core/common/utils.py +182 -69
msprobe/core/common_config.py +44 -15
msprobe/core/compare/acc_compare.py +207 -142
msprobe/core/compare/check.py +2 -5
msprobe/core/compare/compare_cli.py +21 -4
msprobe/core/compare/highlight.py +124 -55
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
msprobe/core/compare/npy_compare.py +52 -23
msprobe/core/compare/utils.py +272 -247
msprobe/core/data_dump/data_collector.py +13 -11
msprobe/core/data_dump/data_processor/base.py +46 -16
msprobe/core/data_dump/data_processor/mindspore_processor.py +4 -4
msprobe/core/data_dump/data_processor/pytorch_processor.py +156 -59
msprobe/core/data_dump/scope.py +113 -34
msprobe/core/grad_probe/constant.py +27 -13
msprobe/core/grad_probe/grad_compare.py +18 -1
msprobe/core/grad_probe/utils.py +30 -2
msprobe/core/overflow_check/abnormal_scene.py +185 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +10 -0
msprobe/docs/02.config_introduction.md +49 -22
msprobe/docs/03.config_examples.md +2 -9
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +3 -1
msprobe/docs/06.data_dump_MindSpore.md +157 -90
msprobe/docs/07.accuracy_checker_PyTorch.md +12 -12
msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +19 -13
msprobe/docs/11.accuracy_compare_MindSpore.md +104 -13
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
msprobe/docs/17.grad_probe.md +5 -6
msprobe/docs/19.monitor.md +468 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +386 -0
msprobe/docs/22.visualization_MindSpore.md +384 -0
msprobe/docs/23.tool_function_introduction.md +28 -0
msprobe/docs/FAQ.md +3 -0
msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/mindspore/__init__.py +15 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +113 -145
msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
msprobe/mindspore/api_accuracy_checker/main.py +27 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +33 -12
msprobe/mindspore/common/const.py +33 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +43 -4
msprobe/mindspore/compare/distributed_compare.py +22 -22
msprobe/mindspore/compare/ms_compare.py +271 -248
msprobe/mindspore/compare/ms_graph_compare.py +81 -47
msprobe/mindspore/debugger/debugger_config.py +4 -1
msprobe/mindspore/debugger/precision_debugger.py +7 -1
msprobe/mindspore/dump/dump_tool_factory.py +3 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +12 -2
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +13 -16
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +25 -0
msprobe/mindspore/dump/jit_dump.py +17 -5
msprobe/mindspore/dump/kernel_graph_dump.py +2 -4
msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +145 -39
msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
msprobe/mindspore/free_benchmark/common/utils.py +19 -4
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
msprobe/mindspore/grad_probe/global_context.py +28 -8
msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +24 -10
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/ms_config.py +22 -15
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +2 -4
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +36 -30
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +24 -7
msprobe/pytorch/__init__.py +3 -2
msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +6 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +19 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +13 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +77 -53
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +15 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +100 -6
msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
msprobe/pytorch/bench_functions/swiglu.py +10 -2
msprobe/pytorch/common/parse_json.py +6 -6
msprobe/pytorch/common/utils.py +56 -5
msprobe/pytorch/compare/distributed_compare.py +8 -9
msprobe/pytorch/compare/pt_compare.py +8 -6
msprobe/pytorch/debugger/debugger_config.py +19 -15
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +15 -0
msprobe/pytorch/free_benchmark/common/params.py +8 -1
msprobe/pytorch/free_benchmark/common/utils.py +26 -4
msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -3
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +10 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
msprobe/pytorch/hook_module/wrap_functional.py +14 -12
msprobe/pytorch/module_processer.py +2 -5
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +340 -0
msprobe/pytorch/monitor/distributed/__init__.py +0 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +870 -0
msprobe/pytorch/monitor/module_metric.py +193 -0
msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
msprobe/pytorch/monitor/optimizer_collect.py +295 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
msprobe/pytorch/monitor/utils.py +250 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +29 -38
msprobe/pytorch/online_dispatch/dispatch.py +50 -25
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +53 -32
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
msprobe/pytorch/online_dispatch/utils.py +49 -21
msprobe/pytorch/parse_tool/lib/compare.py +12 -18
msprobe/pytorch/parse_tool/lib/config.py +1 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +1 -2
msprobe/pytorch/parse_tool/lib/utils.py +16 -35
msprobe/pytorch/parse_tool/lib/visualization.py +2 -0
msprobe/pytorch/pt_config.py +31 -8
msprobe/pytorch/service.py +15 -5
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +165 -0
msprobe/visualization/builder/msprobe_adapter.py +205 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +130 -0
msprobe/visualization/compare/mode_adapter.py +211 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +124 -0
msprobe/visualization/graph/graph.py +200 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +214 -0
msprobe/visualization/utils.py +232 -0
mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
msprobe/docs/04.acl_config_examples.md +0 -78
msprobe/mindspore/compare/layer_mapping.py +0 -146
msprobe/mindspore/compare/modify_mapping.py +0 -107
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0

msprobe/mindspore/compare/ms_graph_compare.py CHANGED Viewed

@@ -1,12 +1,27 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import copy
-import csv
 import glob
 import os
+import re
 import numpy as np
 import pandas as pd
-from msprobe.core.common.const import CompareConst, GraphMode, Const, FileCheckConst
-from msprobe.core.common.file_utils import FileOpen, check_path_before_create, change_mode, load_npy
+from msprobe.core.common.const import CompareConst, GraphMode, Const
+from msprobe.core.common.file_utils import load_npy, read_csv, save_excel
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import add_time_with_xlsx, CompareException
 from msprobe.core.compare.multiprocessing_compute import _ms_graph_handle_multi_process, check_accuracy
@@ -14,7 +29,7 @@ from msprobe.core.compare.npy_compare import npy_data_check, statistics_data_che
 from msprobe.mindspore.common.utils import convert_to_int, list_lowest_level_directories
-class row_data:
+class RowData:
     def __init__(self, mode):
         self.basic_data = copy.deepcopy(CompareConst.MS_GRAPH_BASE)
         self.npy_data = copy.deepcopy(CompareConst.MS_GRAPH_NPY)
@@ -28,17 +43,34 @@ class row_data:
         return self.data
+def get_name_dict(name: str) -> dict:
+    compare_pattern = re.compile(r'^([^.]+)\.([^.]+)\.([^.]+)\.([^.]+)\.(\d+(?:\.\d+)*)\.'
+                                 r'((?:in|out)put(?:\.\d+)*)\.([^.]+)\.([^.]+)\.npy$')
+    match = compare_pattern.match(name)
+    if match:
+        return {'op_type': match.group(1),
+                'op_name': match.group(2),
+                'task_id': match.group(3),
+                'stream_id': match.group(4),
+                'timestamp': match.group(5).split(Const.SEP)[0],
+                'input_output_index': match.group(6),
+                'slot': match.group(7),
+                'format': match.group(8)}
+    return {}
 def npy_data_read(data_path, npy_file_list, mapping_dict):
     data_list = []
+    compare_key_elements = ['op_name', 'task_id', 'input_output_index', 'slot']
     for data in npy_file_list:
         if data in mapping_dict:
-            split_list = mapping_dict[data].split(Const.SEP)
+            name_dict = get_name_dict(mapping_dict[data])
         else:
-            split_list = data.split(Const.SEP)
-        if len(split_list) < 7:
+            name_dict = get_name_dict(data)
+        if not name_dict:
             continue
-        compare_key = f"{split_list[1]}.{split_list[2]}.{split_list[3]}.{split_list[5]}.{split_list[6]}"
-        timestamp = convert_to_int(split_list[4])
+        compare_key = Const.SEP.join([name_dict.get(element) for element in compare_key_elements])
+        timestamp = convert_to_int(name_dict.get('timestamp'))
         data_list.append([os.path.join(data_path, data), compare_key, timestamp])
     return data_list
@@ -48,18 +80,17 @@ def statistic_data_read(statistic_file_list, statistic_file_path):
     data_list = []
     statistic_data_list = []
     header_index = {
-        'Data Type': None, 'Shape': None, 'Max Value': None,
-        'Min Value': None,'Avg Value': None, 'L2Norm Value': None
+        'Data Type': None, 'Shape': None, 'Max Value': None,
+        'Min Value': None, 'Avg Value': None, 'L2Norm Value': None
     }
     for statistic_file in statistic_file_list:
-        with FileOpen(statistic_file, "r") as f:
-            csv_reader = csv.reader(f, delimiter=",")
-            header = next(csv_reader)
-            for key in header_index.keys():
-                for index, value in enumerate(header):
-                    if key == value:
-                        header_index[key] = index
-            statistic_data_list.extend([row for row in csv_reader])
+        content = read_csv(statistic_file, as_pd=False)
+        header = content[0]
+        for key in header_index.keys():
+            for index, value in enumerate(header):
+                if key == value:
+                    header_index[key] = index
+        statistic_data_list.extend(content[1:])
     for key in header_index.keys():
         if header_index[key] is None:
@@ -97,11 +128,9 @@ def generate_data_name(data_path):
     mapping_dict = {}
     if mapping_exist:
         for mapping_file in mapping_file_list:
-            with FileOpen(mapping_file, "r") as f:
-                csv_reader = csv.reader(f, delimiter=",")
-                header = next(csv_reader)
-                for row in csv_reader:
-                    mapping_dict[row[0]] = row[1]
+            content = read_csv(mapping_file, False)
+            for row in content[1:]:
+                mapping_dict[row[0]] = row[1]
     if npy_exist:
         data_list = npy_data_read(data_path, npy_file_list, mapping_dict)
@@ -136,7 +165,7 @@ class GraphMSComparator:
     def compare_ops(compare_result_db, mode):
         def npy_mode_compute(row):
-            result_dict = row_data(GraphMode.NPY_MODE)()
+            result_dict = RowData(GraphMode.NPY_MODE)()
             def process_npy_file(file_path, name_prefix, result):
                 if os.path.exists(file_path):
@@ -171,7 +200,7 @@ class GraphMSComparator:
             return pd.Series(result_dict)
         def statistic_mode_compute(row):
-            result_dict = row_data('STATISTIC')()
+            result_dict = RowData('STATISTIC')()
             def update_result_dict(result, rows, prefix):
                 result[f'{prefix} Name'] = rows[f'{prefix} Name']
@@ -198,24 +227,30 @@ class GraphMSComparator:
                     result_dict[CompareConst.NPU_NORM] - result_dict[CompareConst.BENCH_NORM])
                 result_dict[CompareConst.MAX_RELATIVE_ERR] = result_dict[CompareConst.MAX_DIFF] / result_dict[
                     CompareConst.BENCH_MAX] if result_dict[CompareConst.BENCH_MAX] > 0 else 0
-                result_dict[CompareConst.MAX_RELATIVE_ERR] = str(result_dict[CompareConst.MAX_RELATIVE_ERR] * 100) + "%"
+                if not np.isnan(result_dict[CompareConst.MAX_RELATIVE_ERR]):
+                    result_dict[CompareConst.MAX_RELATIVE_ERR] = str(
+                        result_dict[CompareConst.MAX_RELATIVE_ERR] * 100) + "%"
                 result_dict[CompareConst.MIN_RELATIVE_ERR] = result_dict[CompareConst.MIN_DIFF] / result_dict[
                     CompareConst.BENCH_MIN] if result_dict[CompareConst.BENCH_MIN] > 0 else 0
-                result_dict[CompareConst.MIN_RELATIVE_ERR] = str(result_dict[CompareConst.MIN_RELATIVE_ERR] * 100) + "%"
+                if not np.isnan(result_dict[CompareConst.MIN_RELATIVE_ERR]):
+                    result_dict[CompareConst.MIN_RELATIVE_ERR] = \
+                        str(result_dict[CompareConst.MIN_RELATIVE_ERR] * 100) + "%"
                 result_dict[CompareConst.MEAN_RELATIVE_ERR] = result_dict[CompareConst.MEAN_DIFF] / result_dict[
                     CompareConst.BENCH_MEAN] if result_dict[CompareConst.BENCH_MEAN] > 0 else 0
-                result_dict[CompareConst.MEAN_RELATIVE_ERR] = str(
-                    result_dict[CompareConst.MEAN_RELATIVE_ERR] * 100) + "%"
+                if not np.isnan(result_dict[CompareConst.MEAN_RELATIVE_ERR]):
+                    result_dict[CompareConst.MEAN_RELATIVE_ERR] = str(
+                        result_dict[CompareConst.MEAN_RELATIVE_ERR] * 100) + "%"
                 result_dict[CompareConst.NORM_RELATIVE_ERR] = result_dict[CompareConst.NORM_DIFF] / result_dict[
                     CompareConst.BENCH_NORM] if result_dict[CompareConst.BENCH_NORM] > 0 else 0
-                result_dict[CompareConst.NORM_RELATIVE_ERR] = str(
-                    result_dict[CompareConst.NORM_RELATIVE_ERR] * 100) + "%"
+                if not np.isnan(result_dict[CompareConst.NORM_RELATIVE_ERR]):
+                    result_dict[CompareConst.NORM_RELATIVE_ERR] = str(
+                        result_dict[CompareConst.NORM_RELATIVE_ERR] * 100) + "%"
                 magnitude_diff = result_dict[CompareConst.MAX_DIFF] / (
                         max(result_dict[CompareConst.NPU_MAX], result_dict[CompareConst.BENCH_MAX]) + 1e-10)
-                if magnitude_diff > CompareConst.MAGNITUDE:
-                    result_dict[CompareConst.ACCURACY] = 'No'
-                else:
-                    result_dict[CompareConst.ACCURACY] = 'Yes'
+                if np.isnan(result_dict[CompareConst.NPU_MAX]) and np.isnan(result_dict[CompareConst.BENCH_MAX]):
+                    magnitude_diff = 0
+                result_dict[CompareConst.ACCURACY] = CompareConst.YES if \
+                    magnitude_diff <= CompareConst.MAGNITUDE else CompareConst.NO
             return pd.Series(result_dict)
@@ -238,24 +273,23 @@ class GraphMSComparator:
                 is_empty = True
             if is_empty or not mode:
                 continue
-            compare_result_df = self._do_multi_process(compare_result_df, mode)
+            compare_result_df = self.do_multi_process(compare_result_df, mode)
             compare_result_name = add_time_with_xlsx(f"compare_result_{str(rank_id)}_{str(step_id)}")
             compare_result_path = os.path.join(os.path.realpath(self.output_path), f"{compare_result_name}")
-            check_path_before_create(compare_result_path)
             self.to_excel(compare_result_df, compare_result_path)
             logger.info(f"Compare rank: {rank_id} step: {step_id} finish. Compare result: {compare_result_path}.")
     def to_excel(self, compare_result_df: pd.DataFrame, compare_result_path: str, slice_num=0, need_slice=False) -> int:
         size = len(compare_result_df)
         # sheet size cannot be larger than 1048576
         if size < CompareConst.MAX_EXCEL_LENGTH:
-            compare_result_path = compare_result_path.replace('.xlsx', f'_slice_{slice_num}.xlsx') if need_slice else compare_result_path
-            compare_result_df.to_excel(compare_result_path, index=False)
-            change_mode(compare_result_path, FileCheckConst.DATA_FILE_AUTHORITY)
+            compare_result_path = compare_result_path.replace('.xlsx', f'_slice_{slice_num}.xlsx') if \
+                need_slice else compare_result_path
+            save_excel(compare_result_path, compare_result_df)
             return slice_num + 1
         else:
-            slice_num = self.to_excel(compare_result_df.iloc[0: size//2], compare_result_path, slice_num, True)
-            return self.to_excel(compare_result_df.iloc[size//2:], compare_result_path, slice_num, True)
+            slice_num = self.to_excel(compare_result_df.iloc[0: size // 2], compare_result_path, slice_num, True)
+            return self.to_excel(compare_result_df.iloc[size // 2:], compare_result_path, slice_num, True)
     def compare_process(self, rank_id, step_id):
         # generate data_path
@@ -303,8 +337,8 @@ class GraphMSComparator:
             npu_data_df[npu_float_type] = npu_data_df[npu_float_type].astype(float)
             bench_float_type = [
-                CompareConst.BENCH_MAX, CompareConst.BENCH_MIN,
-                CompareConst.BENCH_MEAN,CompareConst.BENCH_NORM
+                CompareConst.BENCH_MAX, CompareConst.BENCH_MIN,
+                CompareConst.BENCH_MEAN, CompareConst.BENCH_NORM
             ]
             bench_data_df[bench_float_type] = bench_data_df[bench_float_type].astype(float)
@@ -355,7 +389,7 @@ class GraphMSComparator:
                 rank_step_path_dict[rank_step_key] = [dir_path]
         return dict(sorted(rank_step_path_dict.items()))
-    def _do_multi_process(self, result_df, mode):
+    def do_multi_process(self, result_df, mode):
         try:
             result_df = _ms_graph_handle_multi_process(self.compare_ops, result_df, mode)
         except ValueError as e:

msprobe/mindspore/debugger/debugger_config.py CHANGED Viewed

@@ -33,7 +33,7 @@ class DebuggerConfig:
         self.level_ori = common_config.level
         self.list = [] if not task_config.list else task_config.list
         self.scope = [] if not task_config.scope else task_config.scope
-        self.data_mode = [] if not task_config.data_mode else task_config.data_mode
+        self.data_mode = [Const.ALL] if not task_config.data_mode else task_config.data_mode
         self.file_format = task_config.file_format
         self.overflow_nums = 1 if not task_config.overflow_nums else task_config.overflow_nums
         self.check_mode = task_config.check_mode
@@ -52,6 +52,9 @@ class DebuggerConfig:
                self.pert_type != FreeBenchmarkConst.DEFAULT_PERT_TYPE:
                 raise ValueError("pert_mode must be improve_precision or empty when handler_type is fix, "
                                  f"but got {self.pert_type}.")
+            if self.stage == Const.BACKWARD and self.handler_type == FreeBenchmarkConst.FIX:
+                raise ValueError("handler_type must be check or empty when fuzz_stage is backward, "
+                                 f"but got {self.handler_type}.")
             self.dump_level = FreeBenchmarkConst.DEFAULT_DUMP_LEVEL
     def check(self):

msprobe/mindspore/debugger/precision_debugger.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
-# Licensed under the Apache License, Version 2.0  (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -14,13 +14,16 @@
 # limitations under the License.
 import os
+from collections import defaultdict
 import mindspore as ms
 from mindspore._c_expression import MSContext
 from msprobe.core.common.const import Const, MsgConst
+from msprobe.mindspore.cell_processor import CellProcessor
 from msprobe.mindspore.common.const import Const as MsConst
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
+from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell
 from msprobe.mindspore.grad_probe.grad_monitor import GradientMonitor
 from msprobe.mindspore.ms_config import parse_json_config
 from msprobe.mindspore.runtime import Runtime
@@ -128,6 +131,9 @@ class PrecisionDebugger:
             return
         if instance.service:
             instance.service.step()
+        HOOKCell.cell_count = defaultdict(int)
+        CellProcessor.reset_cell_stats()
         Runtime.step_count += 1
     @classmethod

msprobe/mindspore/dump/dump_tool_factory.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
-# Licensed under the Apache License, Version 2.0  (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -40,6 +40,8 @@ class DumpToolFactory:
     @staticmethod
     def create(config: DebuggerConfig):
+        if len(config.data_mode) != 1 or config.data_mode[0] not in Const.GRAPH_DATA_MODE_LIST:
+            raise Exception("data_mode must be one of all, input, output.")
         tool = DumpToolFactory.tools.get(config.level)
         if not tool:
             raise Exception("Valid level is needed.")

msprobe/mindspore/dump/hook_cell/api_registry.py CHANGED Viewed

@@ -24,6 +24,12 @@ from msprobe.mindspore.dump.hook_cell.wrap_api import (HOOKTensor, HOOKStubTenso
 from msprobe.core.common.utils import Const
+def stub_method(method):
+    def wrapped_method(*args, **kwargs):
+        return method(*args, **kwargs)
+    return wrapped_method
 class ApiRegistry:
     def __init__(self):
         self.tensor_ori_attr = {}
@@ -50,9 +56,13 @@ class ApiRegistry:
             if Const.SEP in api:
                 sub_module_name, sub_op = api.rsplit(Const.SEP, 1)
                 sub_module = getattr(ori_api_group, sub_module_name)
-                api_ori_attr[api] = getattr(sub_module, sub_op)
+                ori_api_func = getattr(sub_module, sub_op)
             else:
-                api_ori_attr[api] = getattr(ori_api_group, api)
+                ori_api_func = getattr(ori_api_group, api)
+            if ori_api_group == StubTensor:
+                api_ori_attr[api] = stub_method(ori_api_func)
+                continue
+            api_ori_attr[api] = ori_api_func
     @staticmethod
     def set_api_attr(api_group, attr_dict):

msprobe/mindspore/dump/hook_cell/primitive_hooks.py CHANGED Viewed

@@ -1,4 +1,5 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,18 +12,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ============================================================================
 import os
-import mindspore as ms
-from mindspore.common.tensor import Tensor
 from mindspore import ops
+from mindspore.common.tensor import Tensor
-from msprobe.mindspore.common.log import logger
 from msprobe.core.common.utils import Const, DumpException
-from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs, \
-    ModuleBackwardInputs, ModuleBackwardOutputs
+from msprobe.core.data_dump.data_processor.base import (ModuleBackwardInputs, ModuleBackwardOutputs,
+                                                        ModuleForwardInputsOutputs)
+from msprobe.mindspore.common.log import logger
 class PrimitiveHookService:
@@ -41,6 +40,7 @@ class PrimitiveHookService:
         Returns:
             callable: 包装后的 primitive 函数。
         """
         def create_backward_hook(captured_grads, num_tensors, updated_primitive_name, hook_type):
             """
             创建反向 hook 函数，用于捕获梯度。
@@ -54,26 +54,24 @@ class PrimitiveHookService:
             Returns:
                 callable: 反向 hook 函数。
             """
-            def backward_hook(grad):
-                captured_grads.append(grad)
+            def backward_hook(grad):
+                captured_grads.extend(grad)
                 backward_primitive_name = f"{updated_primitive_name}{Const.SEP}{Const.BACKWARD}"
                 try:
-                    if len(captured_grads) == num_tensors and hook_type == Const.INPUT:
+                    if hook_type == Const.INPUT:
                         self.service_instance.data_collector.update_api_or_module_name(backward_primitive_name)
                         new_module_input_output = ModuleBackwardOutputs(grad_output=tuple(captured_grads))
                         self.service_instance.data_collector.backward_output_data_collect(
                             backward_primitive_name, self, os.getpid(), new_module_input_output
                         )
-                        captured_grads.clear()
-                    elif len(captured_grads) == num_tensors and hook_type == Const.OUTPUT:
+                    elif hook_type == Const.OUTPUT:
                         self.service_instance.data_collector.update_api_or_module_name(backward_primitive_name)
                         new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads))
                         self.service_instance.data_collector.backward_input_data_collect(
                             backward_primitive_name, self, os.getpid(), new_module_input_output
                         )
-                        captured_grads.clear()
                 except Exception as exception:
                     logger.error(f"This is a primitive op {hook_type}_backward dump error: {exception}, "
@@ -104,7 +102,7 @@ class PrimitiveHookService:
                     hooked_inputs.append(arg_hooked)
                 else:
                     hooked_inputs.append(arg)
-            return hooked_inputs
+            return tuple(hooked_inputs)
         def hook_primitive_outputs(out, captured_grads_output, updated_primitive_name):
             """
@@ -178,7 +176,7 @@ class PrimitiveHookService:
                 module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out)
                 try:
                     self.service_instance.data_collector.forward_data_collect(forward_primitive_name, instance_self,
-                                                             os.getpid(), module_input_output)
+                                                                              os.getpid(), module_input_output)
                 except Exception as exception:
                     logger.error(f"This is a primitive op dump error during forward data collection: {exception}, "
                                  f"primitive_name: {primitive_name}")
@@ -203,4 +201,3 @@ class PrimitiveHookService:
             self.primitive_counters[primitive_name] = 0
         else:
             self.primitive_counters[primitive_name] += 1

msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml CHANGED Viewed

@@ -490,6 +490,31 @@ ops:
   - scatter_update
   - derivative
   - jet
+  - row_stack
+  - gather
+  - arange
+  - cond
+  - slice_scatter
+  - clip_by_norm
+  - eps
+  - layer_norm
+  - cast
+  - numel
+  - permute
+  - select_scatter
+  - group_norm
+  - eq
+  - embedding
+  - ones_like
+  - zeros
+  - nanmean
+  - shape
+  - zeros_like
+  - ones
+  - diagonal_scatter
+  - vander
+  - is_nonzero
+  - rotary_position_embedding
 tensor:
   - __abs__

msprobe/mindspore/dump/jit_dump.py CHANGED Viewed

@@ -20,7 +20,7 @@ from mindspore import Tensor
 from mindspore._c_expression import PyNativeExecutor_
 from mindspore.common.api import _MindsporeFunctionExecutor
-from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+from msprobe.core.common.log import logger
 from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs
 from msprobe.core.common.const import Const
 from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs
@@ -33,6 +33,8 @@ def dump_jit(name, in_feat, out_feat, is_forward):
     index = ori_args.find("<")
     if index != 0 and index != -1:
         result = ori_args[0:index]
+    elif name is not None and "<" not in str(name):
+        result = str(name)
     else:
         result = "JitFunction"
     if JitDump.need_dump():
@@ -47,7 +49,7 @@ def dump_jit(name, in_feat, out_feat, is_forward):
             name_template = Const.JIT + Const.SEP + result + Const.SEP + str(JitDump.jit_count[result]) + Const.SEP + \
                             Const.BACKWARD
             JitDump.data_collector.update_api_or_module_name(name_template)
-            module_input_output = ModuleBackwardInputsOutputs(grad_input=in_feat ,grad_output=out_feat)
+            module_input_output = ModuleBackwardInputsOutputs(grad_input=in_feat, grad_output=out_feat)
             JitDump.data_collector.backward_data_collect(name_template, None, pid, module_input_output)
@@ -59,15 +61,25 @@ class JitDump(_MindsporeFunctionExecutor):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+        self.name = None
+        if len(args) > 0:
+            self.name = args[0].__name__
         self._executor = PyNativeExecutor_.get_instance()
     def __call__(self, *args, **kwargs):
-        api_register.api_set_ori_func()
+        if JitDump.jit_dump_switch:
+            api_register.api_set_ori_func()
         out = super().__call__(*args, **kwargs)
         if JitDump.jit_dump_switch and len(args) > 0:
-            dump_jit(args[0], args, out, True)
+            if self.name and self.name != "construct":
+                dump_jit(self.name, args, out, True)
+            else:
+                dump_jit(args[0], args, out, True)
             JitDump.jit_enable = True
-        api_register.api_set_hook_func()
+        elif len(args) == 0:
+            logger.warning(f"The jit function {self.name} has no input arguments, nothing will be dumped.")
+        if JitDump.jit_dump_switch:
+            api_register.api_set_hook_func()
         return out
     @classmethod

msprobe/mindspore/dump/kernel_graph_dump.py CHANGED Viewed

@@ -13,10 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import os
-from msprobe.core.common.file_utils import FileOpen, create_directory
+from msprobe.core.common.file_utils import create_directory, save_json
 from msprobe.mindspore.common.log import logger
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
@@ -62,8 +61,7 @@ class KernelGraphDump:
         json_path = self.dump_json["common_dump_settings"]["path"]
         create_directory(json_path)
         json_path = os.path.join(json_path, "kernel_graph_dump.json")
-        with FileOpen(json_path, 'w') as f:
-            json.dump(self.dump_json, f)
+        save_json(json_path, self.dump_json, indent=4)
         logger.info(json_path + " has been created.")
         os.environ["MINDSPORE_DUMP_CONFIG"] = json_path
         if self.dump_json["common_dump_settings"]["dump_mode"] == 0:

msprobe/mindspore/dump/kernel_kbyk_dump.py CHANGED Viewed

@@ -13,11 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import os
 from msprobe.core.common.const import Const
-from msprobe.core.common.file_utils import FileOpen, create_directory
+from msprobe.core.common.file_utils import create_directory, save_json
 from msprobe.mindspore.common.log import logger
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
@@ -70,8 +69,7 @@ class KernelKbykDump:
         json_path = self.dump_json[KernelKbykDump.COMMON_SETTINGS]["path"]
         create_directory(json_path)
         json_path = os.path.join(json_path, "kernel_kbyk_dump.json")
-        with FileOpen(json_path, 'w') as f:
-            json.dump(self.dump_json, f)
+        save_json(json_path, self.dump_json, indent=4)
         logger.info(json_path + " has been created.")
         os.environ["MINDSPORE_DUMP_CONFIG"] = json_path

mindstudio-probe 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl