PyPI - mindstudio-probe - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (177) hide show

{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/METADATA +3 -3
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/RECORD +168 -150
msprobe/README.md +27 -22
msprobe/core/common/const.py +129 -60
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +25 -2
msprobe/core/common/inplace_ops.yaml +1 -0
msprobe/core/common/utils.py +43 -33
msprobe/core/compare/acc_compare.py +43 -74
msprobe/core/compare/check.py +2 -6
msprobe/core/compare/highlight.py +2 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +1 -1
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -1
msprobe/core/compare/merge_result/merge_result.py +16 -9
msprobe/core/compare/merge_result/utils.py +81 -0
msprobe/core/compare/multiprocessing_compute.py +19 -12
msprobe/core/compare/npy_compare.py +30 -12
msprobe/core/compare/utils.py +30 -10
msprobe/core/data_dump/api_registry.py +176 -0
msprobe/core/data_dump/data_collector.py +58 -13
msprobe/core/data_dump/data_processor/base.py +94 -10
msprobe/core/data_dump/data_processor/factory.py +3 -0
msprobe/core/data_dump/data_processor/mindspore_processor.py +33 -33
msprobe/core/data_dump/data_processor/pytorch_processor.py +99 -18
msprobe/core/data_dump/json_writer.py +61 -40
msprobe/core/grad_probe/constant.py +1 -0
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/docs/01.installation.md +27 -1
msprobe/docs/02.config_introduction.md +27 -23
msprobe/docs/03.config_examples.md +24 -0
msprobe/docs/05.data_dump_PyTorch.md +103 -16
msprobe/docs/06.data_dump_MindSpore.md +76 -32
msprobe/docs/07.accuracy_checker_PyTorch.md +11 -1
msprobe/docs/08.accuracy_checker_online_PyTorch.md +3 -1
msprobe/docs/09.accuracy_checker_MindSpore.md +5 -3
msprobe/docs/10.accuracy_compare_PyTorch.md +59 -33
msprobe/docs/11.accuracy_compare_MindSpore.md +40 -16
msprobe/docs/12.overflow_check_PyTorch.md +3 -1
msprobe/docs/13.overflow_check_MindSpore.md +4 -2
msprobe/docs/14.data_parse_PyTorch.md +1 -7
msprobe/docs/18.online_dispatch.md +1 -1
msprobe/docs/19.monitor.md +332 -273
msprobe/docs/21.visualization_PyTorch.md +42 -13
msprobe/docs/22.visualization_MindSpore.md +43 -13
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/27.dump_json_instruction.md +301 -27
msprobe/docs/28.debugger_save_instruction.md +94 -0
msprobe/docs/28.kernel_dump_MindSpore.md +69 -0
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/step_count_per_record.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +4 -2
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +32 -7
msprobe/mindspore/api_accuracy_checker/api_runner.py +70 -22
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +602 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +47 -1
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -1
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +2 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +130 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +24 -1
msprobe/mindspore/api_accuracy_checker/utils.py +6 -1
msprobe/mindspore/common/const.py +61 -0
msprobe/mindspore/common/utils.py +48 -18
msprobe/mindspore/compare/ms_compare.py +27 -19
msprobe/mindspore/compare/ms_graph_compare.py +6 -5
msprobe/mindspore/debugger/debugger_config.py +31 -6
msprobe/mindspore/debugger/precision_debugger.py +45 -14
msprobe/mindspore/dump/dump_tool_factory.py +5 -3
msprobe/mindspore/dump/hook_cell/api_register.py +142 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +9 -10
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +24 -26
msprobe/mindspore/dump/jit_dump.py +21 -15
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +22 -56
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -1
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +10 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +2 -0
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/monitor/anomaly_detect.py +404 -0
msprobe/mindspore/monitor/distributed/__init__.py +0 -0
msprobe/mindspore/monitor/distributed/distributed_ops.yaml +15 -0
msprobe/mindspore/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +300 -0
msprobe/mindspore/monitor/features.py +63 -0
msprobe/mindspore/monitor/module_hook.py +873 -0
msprobe/mindspore/monitor/module_spec_verifier.py +94 -0
msprobe/mindspore/monitor/utils.py +309 -0
msprobe/mindspore/ms_config.py +8 -2
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/service.py +114 -34
msprobe/pytorch/__init__.py +0 -1
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +12 -7
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +4 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +5 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +25 -6
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -19
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/bench_functions/apply_adam.py +215 -0
msprobe/pytorch/bench_functions/group_norm_silu.py +27 -0
msprobe/pytorch/{parse.py → bench_functions/mish.py} +6 -4
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +50 -0
msprobe/pytorch/bench_functions/sort_v2.py +21 -0
msprobe/pytorch/common/utils.py +97 -4
msprobe/pytorch/debugger/debugger_config.py +19 -9
msprobe/pytorch/debugger/precision_debugger.py +24 -1
msprobe/pytorch/dump/module_dump/module_dump.py +4 -3
msprobe/pytorch/dump/module_dump/module_processer.py +21 -35
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +8 -2
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/hook_module/api_register.py +131 -0
msprobe/pytorch/hook_module/hook_module.py +19 -14
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +173 -75
msprobe/pytorch/monitor/anomaly_detect.py +14 -29
msprobe/pytorch/monitor/csv2tb.py +18 -14
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +238 -193
msprobe/pytorch/monitor/module_metric.py +9 -6
msprobe/pytorch/monitor/optimizer_collect.py +100 -67
msprobe/pytorch/monitor/unittest/test_monitor.py +1 -1
msprobe/pytorch/monitor/utils.py +76 -44
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +9 -0
msprobe/pytorch/online_dispatch/dump_compare.py +3 -0
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +2 -1
msprobe/pytorch/pt_config.py +30 -29
msprobe/pytorch/service.py +114 -32
msprobe/visualization/builder/graph_builder.py +75 -10
msprobe/visualization/builder/msprobe_adapter.py +7 -6
msprobe/visualization/compare/graph_comparator.py +42 -38
msprobe/visualization/compare/mode_adapter.py +0 -19
msprobe/visualization/graph/base_node.py +11 -3
msprobe/visualization/graph/distributed_analyzer.py +71 -3
msprobe/visualization/graph/graph.py +0 -11
msprobe/visualization/graph/node_op.py +4 -3
msprobe/visualization/graph_service.py +4 -5
msprobe/visualization/utils.py +12 -35
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -205
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -75
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/top_level.txt +0 -0

msprobe/mindspore/api_accuracy_checker/compute_element.py CHANGED Viewed

@@ -25,6 +25,7 @@ from msprobe.core.common.file_utils import load_npy
 from msprobe.mindspore.api_accuracy_checker.type_mapping import (api_info_type_str_to_type,
                                                                  ms_dtype_to_dtype_str, torch_dtype_to_dtype_str,
                                                                  dtype_str_to_ms_dtype, dtype_str_to_np_dtype,
+                                                                 dtype_str_to_mindtorch_dtype,
                                                                  dtype_str_to_torch_dtype, type_to_api_info_type_str,
                                                                  DEFAULT_CONSTRUCT_NP_FLOAT_DTYPE, TUPLE_TYPE_STR,
                                                                  MINDSPORE_TENSOR_TYPE_STR, MINDSPORE_DTYPE_TYPE_STR,
@@ -33,6 +34,15 @@ from msprobe.mindspore.api_accuracy_checker.type_mapping import (api_info_type_s
 from msprobe.mindspore.api_accuracy_checker.utils import check_and_get_from_json_dict, global_context
 from msprobe.mindspore.common.log import logger
+import msprobe.mindspore.api_accuracy_checker.torch_mindtorch_importer as env_module
+if env_module.is_valid_pt_mt_env:
+    from msprobe.mindspore.api_accuracy_checker.torch_mindtorch_importer import mindtorch
+    from msprobe.mindspore.api_accuracy_checker.torch_mindtorch_importer import torch
+else:
+    import torch
 class MstensorMetaData:
     def __init__(self, dtype_str, npy_path, maximum, minimum, shape) -> None:
@@ -86,6 +96,37 @@ class ComputeElement:
         torch_tensor = torch.from_numpy(np_ndarray).to(torch_dtype)
         return torch_tensor
+    @staticmethod
+    def transfer_to_mindtorch_tensor(ms_tensor):
+        """
+        Args:
+            ms_tensor: mindspore.Tensor
+        Return:
+            mindtorch_tensor: mindtorch.Tensor
+        """
+        ms_dtype = ms_tensor.dtype
+        dtype_str = ms_dtype_to_dtype_str.get(ms_dtype)
+        if dtype_str not in dtype_str_to_mindtorch_dtype:
+            err_msg = f"ComputeElement.transfer_to_mindtorch_tensor failed: no matching mindtorch dtype for {dtype_str}"
+            logger.error_log_with_exp(err_msg,
+                                      ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
+        else:
+            mindtorch_dtype = dtype_str_to_mindtorch_dtype.get(dtype_str)
+        if dtype_str in int_dtype_str_list:
+            middle_dtype = mindspore.int64
+        else:
+            middle_dtype = mindspore.float64
+        np_ndarray = ms_tensor.astype(middle_dtype).numpy()
+        mindtorch_tensor = mindtorch.from_numpy(np_ndarray).to(ms_dtype)
+        return mindtorch_tensor
     @staticmethod
     def transfer_to_mindspore_tensor(torch_tensor):
         '''
@@ -141,8 +182,11 @@ class ComputeElement:
         elif isinstance(self.parameter, DtypeMetaData):
             if tensor_platform == Const.MS_FRAMEWORK:
                 parameter_tmp = dtype_str_to_ms_dtype.get(self.parameter.dtype_str)
-            else:
+            elif tensor_platform == Const.PT_FRAMEWORK:
                 parameter_tmp = dtype_str_to_torch_dtype.get(self.parameter.dtype_str)
+            elif tensor_platform == Const.MT_FRAMEWORK:
+                parameter_tmp = dtype_str_to_mindtorch_dtype.get(self.parameter.dtype_str)
         elif isinstance(self.parameter, MstensorMetaData):
             mstensor_meta_data = self.parameter
             ms_dtype = dtype_str_to_ms_dtype.get(mstensor_meta_data.dtype_str)
@@ -161,6 +205,8 @@ class ComputeElement:
         # if necessary, do transfer
         if not get_origin and isinstance(parameter_tmp, mindspore.Tensor) and tensor_platform == Const.PT_FRAMEWORK:
             parameter = self.transfer_to_torch_tensor(parameter_tmp)
+        elif not get_origin and isinstance(parameter_tmp, mindspore.Tensor) and tensor_platform == Const.MT_FRAMEWORK:
+            parameter = self.transfer_to_mindtorch_tensor(parameter_tmp)
         elif not get_origin and isinstance(parameter_tmp, torch.Tensor) and tensor_platform == Const.MS_FRAMEWORK:
             parameter = self.transfer_to_mindspore_tensor(parameter_tmp)
         else:

msprobe/mindspore/api_accuracy_checker/data_manager.py CHANGED Viewed

@@ -16,12 +16,13 @@
 import os
 import csv
-from msprobe.core.common.const import Const, CompareConst, MsCompareConst
+from msprobe.core.common.const import Const, CompareConst
 from msprobe.core.common.file_utils import FileOpen, create_directory, write_csv, read_csv
 from msprobe.core.common.utils import add_time_as_suffix, MsprobeBaseException
 from msprobe.mindspore.api_accuracy_checker.base_compare_algorithm import compare_algorithms
 from msprobe.core.common.file_utils import check_file_or_directory_path
 from msprobe.mindspore.common.log import logger
+from msprobe.mindspore.common.const import MsCompareConst
 class ResultCsvEntry:

msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py CHANGED Viewed

@@ -27,10 +27,11 @@ import numpy as np
 from tqdm import tqdm
 # 本地应用/库特定导入
-from msprobe.core.common.const import Const, CompareConst, MsCompareConst
+from msprobe.core.common.const import Const, CompareConst
 from msprobe.mindspore.api_accuracy_checker.api_accuracy_checker import ApiAccuracyChecker, BasicInfoAndStatus
 from msprobe.mindspore.api_accuracy_checker.multi_data_manager import MultiDataManager
 from msprobe.mindspore.common.log import logger
+from msprobe.mindspore.common.const import MsCompareConst
 class MultiApiAccuracyChecker(ApiAccuracyChecker):

msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py ADDED Viewed

@@ -0,0 +1,130 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import gc
+import sys
+from pathlib import Path
+import mindspore
+from msprobe.mindspore.common.log import logger
+from msprobe.core.common.const import Const, CompareConst
+from msprobe.mindspore.common.const import MsCompareConst
+import torch as mindtorch
+from torch import Tensor as mindtorch_tensor
+import torch.nn.functional as mindtorch_func
+import torch.distributed as mindtorch_dist
+is_valid_pt_mt_env = True
+def is_mindtorch():
+    mindtorch_check_result = False
+    try:
+        import torch as test_torch
+        from mindspore import Tensor as MindsporeTensor
+    except ImportError:
+        return mindtorch_check_result
+    tensor = test_torch.tensor(0.0)
+    if isinstance(tensor, MindsporeTensor):
+        mindtorch_check_result = True
+    return mindtorch_check_result
+def remove_torch_related_paths():
+    removed_paths = []
+    if not is_mindtorch():
+        return
+    try:
+        import torch as remove_torch
+        torch_file = remove_torch.__file__
+    except ImportError:
+        return
+    torch_dir = os.path.dirname(torch_file)
+    torch_dir_path = Path(torch_dir).resolve()
+    parent_dir = torch_dir_path.parent
+    paths_to_remove = [str(parent_dir)]
+    for path in paths_to_remove:
+        try:
+            path_resolved = str(Path(path).resolve())
+        except Exception as error:
+            logger.debug(f"Failed to resolve path {path}: {error}")
+            continue
+        if path_resolved in sys.path:
+            index = sys.path.index(path_resolved)
+            removed_paths.append((path_resolved, index))
+            sys.path.pop(index)
+    return
+def clear_torch_from_sys_modules():
+    modules_to_remove = []
+    for module in sys.modules:
+        if module == "torch" or module.startswith("torch."):
+            modules_to_remove.append(module)
+    for module in modules_to_remove:
+        del sys.modules[module]
+def set_pt_mt_env_invalid():
+    global is_valid_pt_mt_env
+    is_valid_pt_mt_env = False
+def delete_torch_paths():
+    if not is_mindtorch():
+        set_pt_mt_env_invalid()
+    clear_torch_from_sys_modules()
+    for count_delete_env_path in range(MsCompareConst.MAX_RECURSION_DEPTH):
+        if not is_mindtorch():
+            break
+        remove_torch_related_paths()
+        clear_torch_from_sys_modules()
+        if count_delete_env_path >= MsCompareConst.MAX_RECURSION_DEPTH - 1:
+            raise Exception(f"Please check if you have a valid PyTorch and MindTorch environment, and ensure "
+                            f"the PYTHONPATH environment variable depth does not exceed {Const.MAX_RECURSION_DEPTH}.")
+if not is_mindtorch():
+    set_pt_mt_env_invalid()
+else:
+    initial_sys_path = sys.path.copy()
+    delete_torch_paths()
+    gc.collect()
+    import torch
+    if is_mindtorch():
+        set_pt_mt_env_invalid()
+    sys.path = initial_sys_path

msprobe/mindspore/api_accuracy_checker/type_mapping.py CHANGED Viewed

@@ -15,10 +15,18 @@
 import mindspore
 import numpy as np
-import torch
 from mindspore._c_expression import typing
 from mindspore.common import dtype as mstype
+from msprobe.mindspore.api_accuracy_checker import torch_mindtorch_importer
+if torch_mindtorch_importer.is_valid_pt_mt_env:
+    from msprobe.mindspore.api_accuracy_checker.torch_mindtorch_importer import mindtorch
+    from msprobe.mindspore.api_accuracy_checker.torch_mindtorch_importer import torch
+else:
+    from msprobe.mindspore.api_accuracy_checker.torch_mindtorch_importer import mindtorch
+    import torch
 INT8 = "Int8"
 UINT8 = "UInt8"
 INT16 = "Int16"
@@ -82,6 +90,21 @@ dtype_str_to_torch_dtype = {
 }
 torch_dtype_to_dtype_str = {value: key for key, value in dtype_str_to_torch_dtype.items()}
+dtype_str_to_mindtorch_dtype = {
+    INT8: mindtorch.int8,
+    UINT8: mindtorch.uint8,
+    INT16: mindtorch.int16,
+    INT32: mindtorch.int32,
+    INT64: mindtorch.int64,
+    FLOAT16: mindtorch.float16,
+    FLOAT32: mindtorch.float32,
+    FLOAT64: mindtorch.float64,
+    BOOL: mindtorch.bool,
+    BFLOAT16: mindtorch.bfloat16,
+}
+mindtorch_dtype_to_dtype_str = {value: key for key, value in dtype_str_to_mindtorch_dtype.items()}
 MINDSPORE_TENSOR_TYPE_STR = "mindspore.Tensor"
 BOOL_TYPE_STR = "bool"
 INT_TYPE_STR = "int"

msprobe/mindspore/api_accuracy_checker/utils.py CHANGED Viewed

@@ -82,10 +82,12 @@ class GlobalContext:
     def __init__(self):
         self.is_constructed = True
         self.dump_data_dir = ""
+        self.framework = Const.MS_FRAMEWORK
-    def init(self, is_constructed, dump_data_dir):
+    def init(self, is_constructed, dump_data_dir, framework):
         self.is_constructed = is_constructed
         self.dump_data_dir = dump_data_dir
+        self.framework = framework
     def get_dump_data_dir(self):
         return self.dump_data_dir
@@ -93,5 +95,8 @@ class GlobalContext:
     def get_is_constructed(self):
         return self.is_constructed
+    def get_framework(self):
+        return self.framework
 global_context = GlobalContext()

msprobe/mindspore/common/const.py CHANGED Viewed

@@ -70,6 +70,67 @@ class Const:
     }
+class MsCompareConst:
+    # api_info field
+    MINT = "Mint"
+    MINT_FUNCTIONAL = "MintFunctional"
+    TENSOR_API = "Tensor"
+    FUNCTIONAL_API = "Functional"
+    FUSION_API = "FUSION"
+    API_NAME_STR_LENGTH = 4
+    MAX_RECURSION_DEPTH = 20
+    # Mindtorch api_info field
+    MINDTORCH_TENSOR = "Tensor"
+    MINDTORCH = "Torch"
+    MINDTORCH_FUNC = "Functional"
+    MINDTORCH_NPU = "NPU"
+    MINDTORCH_DIST = "Distributed"
+    MT_VALID_API_TYPES = [
+        MINDTORCH, MINDTORCH_FUNC, MINDTORCH_TENSOR
+    ]
+    SUPPORTED_FUSION_LIST = ["flash_attention_score"]
+    TASK_FIELD = "task"
+    STATISTICS_TASK = "statistics"
+    FRAMEWORK = "framework"
+    TENSOR_TASK = "tensor"
+    DUMP_DATA_DIR_FIELD = "dump_data_dir"
+    DATA_FIELD = "data"
+    # supported api yaml
+    SUPPORTED_API_LIST_FILE = "checker_support_api.yaml"
+    SUPPORTED_TENSOR_LIST_KEY = "tensor"
+    # detail_csv
+    DETAIL_CSV_API_NAME = "API Name"
+    DETAIL_CSV_BENCH_DTYPE = "Bench Dtype"
+    DETAIL_CSV_TESTED_DTYPE = "Tested Dtype"
+    DETAIL_CSV_SHAPE = "Shape"
+    DETAIL_CSV_PASS_STATUS = "Status"
+    DETAIL_CSV_MESSAGE = "Message"
+    DETAIL_CSV_FILE_NAME = "accuracy_checking_details"
+    # result_csv
+    RESULT_CSV_FORWARD_TEST_SUCCESS = "Forward Test Success"
+    RESULT_CSV_BACKWARD_TEST_SUCCESS = "Backward Test Success"
+    RESULT_CSV_FILE_NAME = "accuracy_checking_result"
+    EPSILON = 1e-8
+    class ProcessStatus:
+        SUCCESS = "success"
+        API_NOT_FOUND = "api_not_found"
+        EXCEPTION_SKIP = "exception_skip"
 class FreeBenchmarkConst:
     ADD_NOISE = "add_noise"
     BIT_NOISE = "bit_noise"

msprobe/mindspore/common/utils.py CHANGED Viewed

@@ -25,7 +25,31 @@ from msprobe.core.common.exceptions import DistributedNotInitializedError
 from msprobe.core.common.file_utils import path_len_exceeds_limit, check_path_exists, save_npy
 from msprobe.core.common.log import logger
 from msprobe.core.common.const import Const
-from msprobe.core.common.utils import CompareException, check_seed_all
+from msprobe.core.common.utils import CompareException, check_seed_all, is_save_variable_valid
+class MsprobeStep(ms.train.Callback):
+    def __init__(self, debugger):
+        super(MsprobeStep, self).__init__()
+        self.debugger = debugger
+    def on_train_step_begin(self, run_context):
+        self.debugger.start()
+    def on_train_step_end(self, run_context):
+        self.debugger.stop()
+        self.debugger.step()
+class MsprobeInitStep(ms.train.Callback):
+    def on_train_begin(self, run_context):
+        try:
+            from ms._c_expression import _set_init_iter
+        except ImportError:
+            logger.warning('MsprobeInitStep does not work on this version of MindSpore.')
+            return
+        cb_params = run_context.original_args()
+        _set_init_iter(cb_params.cur_step_num)
 def get_rank_if_initialized():
@@ -93,20 +117,6 @@ def seed_all(seed=1234, mode=False, rm_dropout=True):
         remove_dropout()
-class MsprobeStep(ms.train.Callback):
-    def __init__(self, debugger):
-        super(MsprobeStep, self).__init__()
-        self.debugger = debugger
-    def on_train_step_begin(self, run_context):
-        self.debugger.start()
-    def on_train_step_end(self, run_context):
-        self.debugger.stop()
-        self.debugger.step()
 class Dropout(ops.Dropout):
     def __init__(self, keep_prob=0.5, seed0=0, seed1=1):
         super().__init__(1., seed0, seed1)
@@ -151,11 +161,10 @@ def is_mindtorch():
         mindtorch_check_result = False
         try:
             import torch
-            from mindspore._c_expression import Tensor
         except ImportError:
             return mindtorch_check_result
         tensor = torch.tensor(0.0)
-        if isinstance(tensor, Tensor):
+        if isinstance(tensor, ms.Tensor):
             mindtorch_check_result = True
     return mindtorch_check_result
@@ -170,7 +179,7 @@ def set_register_backward_hook_functions():
         from msprobe.mindspore.mindtorch import (_call_impl,
                                                  register_full_backward_pre_hook,
                                                  register_full_backward_hook)
-        if not hasattr(torch, "register_full_backward_hook"):
+        if not hasattr(torch.nn.Module, "register_full_backward_hook"):
             setattr(torch.nn.Module, "_call_impl", _call_impl)
             setattr(torch.nn.Module, "register_full_backward_pre_hook", register_full_backward_pre_hook)
             setattr(torch.nn.Module, "register_full_backward_hook", register_full_backward_hook)
@@ -179,3 +188,24 @@ def set_register_backward_hook_functions():
     else:
         register_backward_hook_functions["pre"] = ms.nn.Cell.register_backward_pre_hook
         register_backward_hook_functions["full"] = ms.nn.Cell.register_backward_hook
+def check_save_param(variable, name, save_backward):
+    # try catch this api to skip invalid call
+    valid_data_types = tuple([ms.Tensor, int, float, str])
+    if not is_save_variable_valid(variable, valid_data_types):
+        valid_data_types_with_nested_types = valid_data_types + (dict, tuple, list)
+        logger.warning("PrecisionDebugger.save variable type not valid, "
+                       f"should be one of {valid_data_types_with_nested_types}"
+                       "Skip current save process.")
+        raise ValueError
+    if not isinstance(name, str):
+        logger.warning("PrecisionDebugger.save name not valid, "
+                       "should be string. "
+                       "skip current save process.")
+        raise ValueError
+    if not isinstance(save_backward, bool):
+        logger.warning("PrecisionDebugger.save_backward name not valid, "
+                       "should be bool. "
+                       "Skip current save process.")
+        raise ValueError

msprobe/mindspore/compare/ms_compare.py CHANGED Viewed

@@ -22,10 +22,10 @@ import pandas as pd
 from msprobe.core.common.const import CompareConst, Const
 from msprobe.core.common.exceptions import FileCheckException
-from msprobe.core.common.file_utils import FileOpen, create_directory, load_json, load_npy, load_yaml
+from msprobe.core.common.file_utils import create_directory, load_json, load_npy, load_yaml
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import CompareException, check_compare_param, check_configuration_param, \
-    check_op_str_pattern_valid, get_dump_mode, set_dump_path
+    check_op_str_pattern_valid, get_dump_mode, set_dump_path, detect_framework_by_dump_json
 from msprobe.core.compare.acc_compare import Comparator, ModeConfig
 from msprobe.core.compare.check import dtype_mapping
 from msprobe.core.compare.layer_mapping import generate_data_mapping_by_layer_mapping
@@ -78,6 +78,11 @@ class MSComparator(Comparator):
             raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got "
                             f"{type(self.data_mapping)}")
+    @staticmethod
+    def process_data_name(result):
+        result['data_name_x'] = result.apply(lambda row: [row['data_name_x'], row['data_name_y']], axis=1)
+        return result
     def calc_accuracy(self, result_df, header):
         condition_no_bench = result_df[CompareConst.BENCH_NAME] == CompareConst.N_A
         result_df[condition_no_bench] = result_df[condition_no_bench].fillna(CompareConst.N_A)
@@ -120,12 +125,13 @@ class MSComparator(Comparator):
             result_df.loc[~condition_md5_equal & ~condition_no_bench, CompareConst.RESULT] = CompareConst.DIFF
         elif self.dump_mode == Const.SUMMARY:
             warning_list = [calc_summary_diff(data_type) for data_type in ['max', 'min', 'mean', 'l2norm']]
-            warning_flag = pd.DataFrame(warning_list).all()
+            warning_flag = pd.DataFrame(warning_list).any()
             result_df.loc[~condition_no_bench, [CompareConst.RESULT, CompareConst.ERROR_MESSAGE]] = ''
             result_df.loc[warning_flag, CompareConst.RESULT] = CompareConst.WARNING
             result_df.loc[warning_flag, CompareConst.ERROR_MESSAGE] = 'Need double check api accuracy.'
         else:
-            fill_cols = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR,
+            fill_cols = [CompareConst.COSINE, CompareConst.EUC_DIST,
+                         CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR,
                          CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO,
                          CompareConst.ERROR_MESSAGE]
             result_df.loc[~condition_no_bench, fill_cols] = ''
@@ -139,6 +145,8 @@ class MSComparator(Comparator):
             header.append(CompareConst.STACK)
         if self.dump_mode == Const.ALL:
             header.append(CompareConst.DATA_NAME)
+            result = self.process_data_name(result)
         result.rename(columns={'op_name_x': CompareConst.NPU_NAME,
                                'op_name_y': CompareConst.BENCH_NAME,
                                'dtype_x': CompareConst.NPU_DTYPE,
@@ -169,6 +177,7 @@ class MSComparator(Comparator):
         result[npu_summary] = result['summary_x'].apply(set_summary).tolist()
         result[bench_summary] = result['summary_y'].apply(set_summary).tolist()
         result_df = pd.DataFrame(columns=header)
         for h in header:
             if h in result.columns:
@@ -269,15 +278,15 @@ class MSComparator(Comparator):
             bench_dtype = match_result['dtype_y']
             if self.cross_frame:
                 npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype)
-            return ((npu_dtype == bench_dtype) |
-                    ((npu_dtype == Const.FLOAT16) & (bench_dtype == Const.FLOAT32)) |
-                    ((npu_dtype == Const.FLOAT32) & (bench_dtype == Const.FLOAT16)) |
-                    ((npu_dtype == Const.FLOAT16) & (bench_dtype == Const.BFLOAT16)) |
-                    ((npu_dtype == Const.BFLOAT16) & (bench_dtype == Const.FLOAT16)) |
-                    ((npu_dtype == Const.TORCH_FLOAT16) & (bench_dtype == Const.TORCH_FLOAT32)) |
-                    ((npu_dtype == Const.TORCH_FLOAT32) & (bench_dtype == Const.TORCH_FLOAT16)) |
-                    ((npu_dtype == Const.TORCH_FLOAT16) & (bench_dtype == Const.TORCH_BFLOAT16)) |
-                    ((npu_dtype == Const.TORCH_BFLOAT16) & (bench_dtype == Const.TORCH_FLOAT16)))
+            equal_condition = npu_dtype == bench_dtype
+            match_condition = (
+                    (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin(
+                        CompareConst.DTYPE_MATCH_GROUPS[0])) |
+                    (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin(
+                        CompareConst.DTYPE_MATCH_GROUPS[1]))
+            )
+            return equal_condition | match_condition
         match_result.loc[~gen_dtype_condition(), [i + '_y' for i in bench_df.columns]] = CompareConst.N_A
         return self.make_result_df(match_result)
@@ -382,12 +391,11 @@ class MSComparator(Comparator):
 def check_cross_framework(bench_json_path):
-    pattern = r'"data_name":\s*"[^"]+\.pt"'
-    with FileOpen(bench_json_path, 'r') as file:
-        for line in file:
-            if re.search(pattern, line):
-                return True
-    return False
+    framework = detect_framework_by_dump_json(bench_json_path)
+    if framework == Const.PT_FRAMEWORK:
+        return True
+    else:
+        return False
 def ms_compare(input_param, output_path, **kwargs):

msprobe/mindspore/compare/ms_graph_compare.py CHANGED Viewed

@@ -195,11 +195,12 @@ class GraphMSComparator:
             if not error_flag:
                 result_list, err_msg = compare_ops_apply(n_value, b_value, False, "")
                 result_dict[CompareConst.COSINE] = result_list[0]
-                result_dict[CompareConst.MAX_ABS_ERR] = result_list[1]
-                result_dict[CompareConst.MAX_RELATIVE_ERR] = result_list[2]
-                result_dict[CompareConst.ONE_THOUSANDTH_ERR_RATIO] = result_list[3]
-                result_dict[CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = result_list[4]
-                result_dict[CompareConst.ACCURACY] = check_accuracy(result_list[0], result_list[1])
+                result_dict[CompareConst.EUC_DIST] = result_list[1]
+                result_dict[CompareConst.MAX_ABS_ERR] = result_list[2]
+                result_dict[CompareConst.MAX_RELATIVE_ERR] = result_list[3]
+                result_dict[CompareConst.ONE_THOUSANDTH_ERR_RATIO] = result_list[4]
+                result_dict[CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = result_list[5]
+                result_dict[CompareConst.ACCURACY] = check_accuracy(result_list[0], result_list[2])
                 result_dict[CompareConst.ERROR_MESSAGE] = err_msg
             return pd.Series(result_dict)

msprobe/mindspore/debugger/debugger_config.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -16,9 +16,11 @@
 import os
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import MsprobeException
 from msprobe.core.common.file_utils import create_directory
 from msprobe.mindspore.common.const import Const as MsConst
 from msprobe.mindspore.common.const import FreeBenchmarkConst
+from msprobe.core.common.log import logger
 class DebuggerConfig:
@@ -50,12 +52,14 @@ class DebuggerConfig:
                                  if not task_config.handler_type else task_config.handler_type)
             self.stage = FreeBenchmarkConst.DEFAULT_STAGE if not task_config.fuzz_stage else task_config.fuzz_stage
             if self.handler_type == FreeBenchmarkConst.FIX and \
-               self.pert_type != FreeBenchmarkConst.DEFAULT_PERT_TYPE:
-                raise ValueError("pert_mode must be improve_precision or empty when handler_type is fix, "
-                                 f"but got {self.pert_type}.")
+                    self.pert_type != FreeBenchmarkConst.DEFAULT_PERT_TYPE:
+                logger.error("pert_mode must be improve_precision or empty when handler_type is fix, "
+                             f"but got {self.pert_type}.")
+                raise ValueError
             if self.stage == Const.BACKWARD and self.handler_type == FreeBenchmarkConst.FIX:
-                raise ValueError("handler_type must be check or empty when fuzz_stage is backward, "
-                                 f"but got {self.handler_type}.")
+                logger.error("handler_type must be check or empty when fuzz_stage is backward, "
+                             f"but got {self.handler_type}.")
+                raise ValueError
             self.dump_level = FreeBenchmarkConst.DEFAULT_DUMP_LEVEL
     def check(self):
@@ -72,4 +76,25 @@ class DebuggerConfig:
             self.check_mode = "all"
         if not isinstance(self.async_dump, bool):
             raise Exception("The parameters async_dump should be bool.")
+        if self.async_dump and self.task == Const.TENSOR and not self.list:
+            raise Exception("The parameters async_dump is true in tensor task, the parameters list cannot be empty.")
+        if self.task == Const.STRUCTURE and self.level_ori not in [Const.LEVEL_L0, Const.LEVEL_MIX]:
+            logger.warning_on_rank_0(
+                f"When the task is set to structure, the level should be one of {[Const.LEVEL_L0, Const.LEVEL_MIX]}. "
+                f"If not, the default level is {Const.LEVEL_MIX}."
+            )
+            self.level_ori = Const.LEVEL_MIX
         return True
+    def check_config_with_l2(self):
+        if self.level_ori != Const.LEVEL_L2:
+            return
+        if self.task != Const.TENSOR:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"When level is set to L2, the task must be set to tensor.")
+        if self.scope:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"When level is set to L2, the scope cannot be configured.")
+        if not self.list or len(self.list) != 1:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"When level is set to L2, the list must be configured as a list with one api name.")

mindstudio-probe 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

mindstudio-probe 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl