PyPI - mindstudio-probe - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/METADATA +1 -1
mindstudio_probe-1.1.0.dist-info/RECORD +287 -0
msprobe/README.md +46 -16
msprobe/__init__.py +16 -1
msprobe/config.json +0 -2
msprobe/core/advisor/advisor.py +8 -8
msprobe/core/advisor/advisor_const.py +6 -7
msprobe/core/advisor/advisor_result.py +12 -12
msprobe/core/common/const.py +64 -3
msprobe/core/common/exceptions.py +2 -2
msprobe/core/common/file_utils.py +54 -9
msprobe/core/common/inplace_op_checker.py +38 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +21 -11
msprobe/core/common/utils.py +153 -167
msprobe/core/common_config.py +18 -25
msprobe/core/compare/acc_compare.py +209 -36
msprobe/core/compare/check.py +102 -17
msprobe/core/compare/compare_cli.py +21 -1
msprobe/core/compare/highlight.py +41 -5
msprobe/core/compare/multiprocessing_compute.py +33 -8
msprobe/core/compare/npy_compare.py +21 -6
msprobe/core/compare/utils.py +82 -48
msprobe/core/data_dump/data_collector.py +31 -32
msprobe/core/data_dump/data_processor/base.py +45 -22
msprobe/core/data_dump/data_processor/factory.py +20 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +11 -5
msprobe/core/data_dump/data_processor/pytorch_processor.py +24 -7
msprobe/core/data_dump/json_writer.py +63 -42
msprobe/core/data_dump/scope.py +32 -16
msprobe/core/grad_probe/constant.py +4 -0
msprobe/core/grad_probe/grad_compare.py +2 -3
msprobe/core/grad_probe/utils.py +16 -3
msprobe/docs/01.installation.md +19 -9
msprobe/docs/02.config_introduction.md +52 -80
msprobe/docs/03.config_examples.md +3 -13
msprobe/docs/04.acl_config_examples.md +11 -9
msprobe/docs/05.data_dump_PyTorch.md +140 -12
msprobe/docs/06.data_dump_MindSpore.md +47 -5
msprobe/docs/07.accuracy_checker_PyTorch.md +57 -34
msprobe/docs/08.accuracy_checker_online_PyTorch.md +51 -11
msprobe/docs/09.accuracy_checker_MindSpore.md +8 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +181 -99
msprobe/docs/11.accuracy_compare_MindSpore.md +162 -31
msprobe/docs/13.overflow_check_MindSpore.md +1 -1
msprobe/docs/15.free_benchmarking_PyTorch.md +59 -53
msprobe/docs/16.free_benchmarking_MindSpore.md +140 -0
msprobe/docs/17.grad_probe.md +14 -16
msprobe/docs/18.online_dispatch.md +89 -0
msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +22 -10
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +1 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +35 -11
msprobe/mindspore/api_accuracy_checker/api_info.py +7 -0
msprobe/mindspore/cell_processor.py +27 -3
msprobe/mindspore/common/const.py +2 -0
msprobe/mindspore/common/utils.py +18 -2
msprobe/mindspore/compare/distributed_compare.py +9 -22
msprobe/mindspore/compare/layer_mapping.py +146 -0
msprobe/mindspore/compare/modify_mapping.py +107 -0
msprobe/mindspore/compare/ms_compare.py +173 -35
msprobe/mindspore/compare/ms_graph_compare.py +27 -11
msprobe/mindspore/debugger/debugger_config.py +16 -13
msprobe/mindspore/debugger/precision_debugger.py +37 -13
msprobe/mindspore/dump/dump_tool_factory.py +16 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +11 -1
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +206 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +82 -10
msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
msprobe/mindspore/dump/jit_dump.py +41 -17
msprobe/mindspore/dump/kernel_graph_dump.py +19 -3
msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -4
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +19 -4
msprobe/mindspore/free_benchmark/common/config.py +15 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +15 -0
msprobe/mindspore/free_benchmark/common/utils.py +19 -5
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +16 -2
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +18 -3
msprobe/mindspore/free_benchmark/handler/base_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/fix_handler.py +15 -0
msprobe/mindspore/free_benchmark/handler/handler_factory.py +18 -3
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +22 -7
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -0
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +22 -7
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +44 -18
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +18 -4
msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +20 -5
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +15 -0
msprobe/mindspore/grad_probe/global_context.py +18 -8
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
msprobe/mindspore/service.py +42 -123
msprobe/pytorch/__init__.py +20 -1
msprobe/pytorch/api_accuracy_checker/common/config.py +19 -2
msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +47 -21
msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +67 -32
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +26 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +19 -2
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +51 -125
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +146 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +21 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +78 -33
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +110 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +36 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +18 -3
msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +15 -0
msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
msprobe/pytorch/bench_functions/linear.py +15 -0
msprobe/pytorch/bench_functions/matmul_backward.py +21 -6
msprobe/pytorch/bench_functions/npu_fusion_attention.py +180 -151
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +28 -9
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
msprobe/pytorch/bench_functions/swiglu.py +20 -5
msprobe/pytorch/common/__init__.py +15 -0
msprobe/pytorch/common/log.py +18 -6
msprobe/pytorch/common/parse_json.py +26 -11
msprobe/pytorch/common/utils.py +40 -35
msprobe/pytorch/compare/distributed_compare.py +11 -11
msprobe/pytorch/compare/match.py +15 -0
msprobe/pytorch/compare/pt_compare.py +38 -6
msprobe/pytorch/debugger/debugger_config.py +52 -39
msprobe/pytorch/debugger/precision_debugger.py +72 -24
msprobe/pytorch/free_benchmark/__init__.py +20 -5
msprobe/pytorch/free_benchmark/common/enums.py +28 -0
msprobe/pytorch/free_benchmark/common/params.py +15 -0
msprobe/pytorch/free_benchmark/common/utils.py +17 -1
msprobe/pytorch/free_benchmark/compare/grad_saver.py +28 -7
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +15 -0
msprobe/pytorch/free_benchmark/main.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +26 -2
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +55 -16
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
msprobe/pytorch/function_factory.py +17 -2
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_stat_csv.py +2 -2
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +13 -8
msprobe/pytorch/hook_module/hook_module.py +17 -19
msprobe/pytorch/hook_module/utils.py +4 -6
msprobe/pytorch/hook_module/wrap_aten.py +12 -11
msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
msprobe/pytorch/hook_module/wrap_functional.py +10 -11
msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
msprobe/pytorch/hook_module/wrap_torch.py +4 -6
msprobe/pytorch/hook_module/wrap_vf.py +4 -6
msprobe/pytorch/module_processer.py +17 -2
msprobe/pytorch/online_dispatch/compare.py +11 -12
msprobe/pytorch/online_dispatch/single_compare.py +7 -7
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +8 -0
msprobe/pytorch/online_dispatch/utils.py +1 -4
msprobe/pytorch/parse.py +15 -0
msprobe/pytorch/parse_tool/cli.py +5 -6
msprobe/pytorch/parse_tool/lib/compare.py +9 -10
msprobe/pytorch/parse_tool/lib/parse_tool.py +3 -0
msprobe/pytorch/parse_tool/lib/utils.py +28 -24
msprobe/pytorch/parse_tool/lib/visualization.py +1 -1
msprobe/pytorch/pt_config.py +167 -38
msprobe/pytorch/service.py +97 -32
mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/functional/data_processor.py +0 -0
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/top_level.txt +0 -0

msprobe/pytorch/common/__init__.py CHANGED Viewed

@@ -1,2 +1,17 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .parse_json import parse_json_info_forward_backward
 from .utils import seed_all

msprobe/pytorch/common/log.py CHANGED Viewed

@@ -1,9 +1,21 @@
-import os
-import time
-import sys
-from msprobe.pytorch.common.utils import get_rank_if_initialized
-from msprobe.core.common.log import BaseLogger
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from msprobe.core.common.exceptions import DistributedNotInitializedError
+from msprobe.core.common.log import BaseLogger
+from msprobe.pytorch.common.utils import get_rank_if_initialized
 class PyTorchLogger(BaseLogger):
@@ -18,4 +30,4 @@ class PyTorchLogger(BaseLogger):
         return current_rank
-logger = PyTorchLogger()
+logger = PyTorchLogger()

msprobe/pytorch/common/parse_json.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import json
 from msprobe.core.common.exceptions import ParseJsonException
@@ -5,14 +20,6 @@ from msprobe.core.common.file_utils import FileOpen
 def parse_json_info_forward_backward(json_path):
-    def parse_data_name_with_pattern(data_name, pattern):
-        name_struct = data_name.split('.')
-        if not name_struct[-1] == pattern:
-            raise ParseJsonException(ParseJsonException.UnexpectedNameStruct,
-                f"{data_name} in file {json_path}")
-        api_name = '.'.join(name_struct[:-1])
-        return api_name
     with FileOpen(json_path, 'r') as f:
         dump_json = json.load(f)
@@ -27,13 +34,21 @@ def parse_json_info_forward_backward(json_path):
         if "Module" in data_name:
             continue
         if "forward" in data_name:
-            api_name = parse_data_name_with_pattern(data_name, "forward")
+            api_name = parse_data_name_with_pattern(data_name, "forward", json_path)
             forward_data.update({api_name: data_item})
         elif "backward" in data_name:
-            api_name = parse_data_name_with_pattern(data_name, "backward")
+            api_name = parse_data_name_with_pattern(data_name, "backward", json_path)
             backward_data.update({api_name: data_item})
         else:
             raise ParseJsonException(ParseJsonException.UnexpectedNameStruct,
-                f"{data_name} in file {json_path}.")
+                                     f"{data_name} in file {json_path}.")
     return forward_data, backward_data, real_data_path
+def parse_data_name_with_pattern(data_name, pattern, json_path):
+    name_struct = data_name.split('.')
+    if not name_struct[-1] == pattern:
+        raise ParseJsonException(ParseJsonException.UnexpectedNameStruct, f"{data_name} in file {json_path}")
+    api_name = '.'.join(name_struct[:-1])
+    return api_name

msprobe/pytorch/common/utils.py CHANGED Viewed

@@ -1,8 +1,7 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-# Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -13,20 +12,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""
 import io
 import os
 import random
 import stat
+from functools import wraps
+import numpy as np
 import torch
 import torch.distributed as dist
-import numpy as np
-from functools import wraps
 from msprobe.core.common.exceptions import DistributedNotInitializedError
-from msprobe.core.common.log import logger
 from msprobe.core.common.file_utils import (FileCheckConst, change_mode,
                                             check_file_or_directory_path, check_path_before_create)
+from msprobe.core.common.log import logger
+from msprobe.core.common.utils import check_seed_all
+from packaging import version
 try:
     import torch_npu
@@ -35,10 +36,8 @@ except ImportError:
 else:
     is_gpu = False
 torch_without_guard_version = torch.__version__ >= '2.1'
 if not is_gpu and not torch_without_guard_version:
     from torch_npu.utils.device_guard import torch_device_guard as torch_npu_device_guard
@@ -46,7 +45,6 @@ npu_distributed_api = ['isend', 'irecv']
 def parameter_adapter(func):
     def handle_masked_select(input_tensor, indices):
         masked_select_func = getattr(torch._C._VariableFunctionsClass, "masked_select")
         if input_tensor.dtype == torch.bfloat16:
@@ -80,17 +78,19 @@ def parameter_adapter(func):
         if self.op_name_ == "__eq__" and args[1] is None:
             return False
         return func(self, *args, **kwargs)
     return inner
 def torch_device_guard(func):
     if is_gpu or torch_without_guard_version:
         return func
-    # Parse args/kwargs matched torch.device objects
+    # Parse args/kwargs matched torch.device objects
     @torch_npu_device_guard
     def wrapper(*args, **kwargs):
         return func(*args, **kwargs)
     return wrapper
@@ -105,20 +105,28 @@ def get_rank_if_initialized():
 def seed_all(seed=1234, mode=False):
-    random.seed(seed)
-    os.environ['PYTHONHASHSEED'] = str(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    torch.use_deterministic_algorithms(mode)
-    if is_gpu:
-        torch.cuda.manual_seed_all(seed)
-        torch.cuda.manual_seed(seed)
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.enable = False
-        torch.backends.cudnn.benchmark = False
-    else:
-        torch_npu.npu.manual_seed_all(seed)
-        torch_npu.npu.manual_seed(seed)
+    check_seed_all(seed, mode)
+    try:
+        random.seed(seed)
+        os.environ['PYTHONHASHSEED'] = str(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        cuda_version = torch.version.cuda
+        if cuda_version is not None and version.parse(cuda_version) >= version.parse("10.2"):
+            os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
+        os.environ['HCCL_DETERMINISTIC'] = str(mode)
+        torch.use_deterministic_algorithms(mode)
+        if is_gpu:
+            torch.cuda.manual_seed_all(seed)
+            torch.cuda.manual_seed(seed)
+            torch.backends.cudnn.deterministic = True
+            torch.backends.cudnn.enable = False
+            torch.backends.cudnn.benchmark = False
+        else:
+            torch_npu.npu.manual_seed_all(seed)
+            torch_npu.npu.manual_seed(seed)
+    except Exception as e:
+        logger.error(f"There is an unexpected error while determinating randomness. {e}")
 class Const:
@@ -191,10 +199,7 @@ class Const:
     ENV_ENABLE = "1"
     ENV_DISABLE = "0"
-    MAX_SEED_VALUE = 2**32 - 1
-    INPLACE_LIST = ["broadcast", "all_reduce", "reduce", "all_gather", "gather", "scatter", "reduce_scatter",
-                    "_reduce_scatter_base", "_all_gather_base", "all_to_all_single"]
+    MAX_SEED_VALUE = 2 ** 32 - 1
     TASK_LIST = ["tensor", "statistics", "overflow_check", "free_benchmark"]
     LEVEL_LIST = ["L0", "L1", "L2", "mix"]
@@ -257,7 +262,7 @@ def print_rank_0(message):
             logger.info(message)
     else:
         logger.info(message)
 def load_pt(pt_path, to_cpu=False):
     pt_path = os.path.realpath(pt_path)
@@ -279,8 +284,8 @@ def save_pt(tensor, filepath):
         torch.save(tensor, filepath)
     except Exception as e:
         logger.error("Save pt file failed, please check according possible error causes: "
-                            "1. out of disk space or disk error, "
-                            "2. no permission to write files, etc.")
+                     "1. out of disk space or disk error, "
+                     "2. no permission to write files, etc.")
         raise RuntimeError(f"save pt file {filepath} failed") from e
     change_mode(filepath, FileCheckConst.DATA_FILE_AUTHORITY)

msprobe/pytorch/compare/distributed_compare.py CHANGED Viewed

@@ -1,8 +1,7 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-# Copyright (C) 2019-2024. Huawei Technologies Co., Ltd. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
+# Copyright (c) 2019-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -13,14 +12,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""
 import os
 from msprobe.core.common.utils import CompareException, check_compare_param, \
     check_configuration_param, task_dumppath_get
 from msprobe.core.common.file_utils import create_directory
 from msprobe.core.common.exceptions import FileCheckException
 from msprobe.pytorch.common.log import logger
-from msprobe.core.common.const import Const
 from msprobe.pytorch.compare.pt_compare import PTComparator
 from msprobe.core.compare.utils import check_and_return_dir_contents, extract_json
@@ -55,12 +53,14 @@ def compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs):
         }
         try:
             summary_compare, md5_compare = task_dumppath_get(dump_result_param)
-            check_configuration_param(stack_mode, auto_analyze, fuzzy_match)
+            check_configuration_param(stack_mode, auto_analyze, fuzzy_match,
+                                      dump_result_param.get('is_print_compare_log', True))
             create_directory(output_path)
-            check_compare_param(dump_result_param, output_path, summary_compare=summary_compare, md5_compare=md5_compare)
+            check_compare_param(dump_result_param, output_path,
+                                summary_compare=summary_compare, md5_compare=md5_compare)
         except (CompareException, FileCheckException) as error:
             logger.error('Compare failed. Please check the arguments and do it again!')
             raise CompareException(error.code) from error
         pt_comparator = PTComparator()
-        pt_comparator.compare_core(dump_result_param, output_path, suffix=f'_{nr}-{br}', summary_compare=summary_compare,
-                     md5_compare=md5_compare, **kwargs)
+        pt_comparator.compare_core(dump_result_param, output_path, suffix=f'_{nr}-{br}',
+                                   summary_compare=summary_compare, md5_compare=md5_compare, **kwargs)

msprobe/pytorch/compare/match.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 from msprobe.core.common.utils import CompareException
 from msprobe.core.common.file_utils import load_yaml

msprobe/pytorch/compare/pt_compare.py CHANGED Viewed

@@ -1,17 +1,48 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os.path
 import torch
 from msprobe.core.common.const import FileCheckConst
 from msprobe.pytorch.common.log import logger
 from msprobe.core.common.exceptions import FileCheckException
 from msprobe.core.compare.acc_compare import Comparator
-from msprobe.core.common.utils import check_configuration_param, task_dumppath_get, check_compare_param, CompareException
-from msprobe.core.common.file_utils import FileChecker, create_directory
+from msprobe.core.common.utils import check_configuration_param, task_dumppath_get, check_compare_param, \
+    CompareException
+from msprobe.core.common.file_utils import FileChecker, create_directory, load_yaml
 from msprobe.pytorch.common.utils import load_pt
 class PTComparator (Comparator):
-    def __init__(self):
+    def __init__(self, data_mapping=None):
         self.frame_name = PTComparator.__name__
+        self.data_mapping = data_mapping
+        if isinstance(self.data_mapping, str) or self.data_mapping is None:
+            self.data_mapping_dict = self.load_mapping_file(self.data_mapping)
+        elif isinstance(self.data_mapping, dict):
+            self.data_mapping_dict = self.data_mapping
+        else:
+            raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got "
+                            f"{type(self.data_mapping)}")
+    def load_mapping_file(self, mapping_file):
+        if isinstance(mapping_file, str):
+            mapping_dict = load_yaml(mapping_file)
+        else:
+            mapping_dict = {}
+        return mapping_dict
     def read_npy_data(self, dir_path, file_name):
         data_path = os.path.join(dir_path, file_name)
@@ -35,16 +66,17 @@ class PTComparator (Comparator):
         return data_value
-def compare(input_param, output_path, stack_mode=False, auto_analyze=True, fuzzy_match=False):
+def compare(input_param, output_path, stack_mode=False, auto_analyze=True, fuzzy_match=False, **kwargs):
     try:
         summary_compare, md5_compare = task_dumppath_get(input_param)
-        check_configuration_param(stack_mode, auto_analyze, fuzzy_match)
+        check_configuration_param(stack_mode, auto_analyze, fuzzy_match, input_param.get('is_print_compare_log', True))
         create_directory(output_path)
         check_compare_param(input_param, output_path, summary_compare, md5_compare)
+        data_mapping = kwargs.get('data_mapping', None)
     except (CompareException, FileCheckException) as error:
         logger.error('Compare failed. Please check the arguments and do it again!')
         raise CompareException(error.code) from error
-    pt_comparator = PTComparator()
+    pt_comparator = PTComparator(data_mapping)
     pt_comparator.compare_core(input_param, output_path, stack_mode=stack_mode,
                  auto_analyze=auto_analyze, fuzzy_match=fuzzy_match, summary_compare=summary_compare,
                  md5_compare=md5_compare)

msprobe/pytorch/debugger/debugger_config.py CHANGED Viewed

@@ -1,6 +1,23 @@
-from msprobe.pytorch.common import seed_all
-from msprobe.pytorch.common.log import logger
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import MsprobeException
+from msprobe.pytorch.common.log import logger
 class DebuggerConfig:
@@ -10,8 +27,6 @@ class DebuggerConfig:
         self.rank = common_config.rank if common_config.rank else []
         self.step = common_config.step if common_config.step else []
         self.level = level or common_config.level or "L1"
-        self.seed = common_config.seed if common_config.seed else 1234
-        self.is_deterministic = common_config.is_deterministic
         self.enable_dataloader = common_config.enable_dataloader
         self.scope = task_config.scope if task_config.scope else []
         self.list = task_config.list if task_config.list else []
@@ -25,15 +40,15 @@ class DebuggerConfig:
         self.framework = Const.PT_FRAMEWORK
         if self.task == Const.FREE_BENCHMARK:
-            self.fuzz_device = task_config.fuzz_device if task_config.fuzz_device else 'npu'
-            self.handler_type = task_config.handler_type if task_config.handler_type else 'check'
-            self.pert_mode = task_config.pert_mode if task_config.pert_mode else 'improve_precision'
-            self.fuzz_level = task_config.fuzz_level if task_config.fuzz_level else 'L1'
-            self.fuzz_stage = task_config.fuzz_stage if task_config.fuzz_stage else 'forward'
+            self.fuzz_device = task_config.fuzz_device
+            self.handler_type = task_config.handler_type
+            self.pert_mode = task_config.pert_mode
+            self.fuzz_level = task_config.fuzz_level
+            self.fuzz_stage = task_config.fuzz_stage
             self.preheat_config = {
-                "if_preheat": task_config.if_preheat if task_config.if_preheat is not None else True,
-                "preheat_step": task_config.preheat_step if task_config.preheat_step else 15,
-                "max_sample": task_config.max_sample if task_config.max_sample else 20,
+                "if_preheat": task_config.if_preheat,
+                "preheat_step": task_config.preheat_step,
+                "max_sample": task_config.max_sample
             }
         self.online_run_ut = False
@@ -46,8 +61,7 @@ class DebuggerConfig:
             self.port = task_config.port if task_config.port else -1
         self.check()
-        if self.step:
-            self.step.sort()
         if self.level == "L2":
             if not self.scope or not isinstance(self.scope, list) or len(self.scope) != 1:
                 raise ValueError("scope must be configured as a list with one api name")
@@ -58,38 +72,37 @@ class DebuggerConfig:
                 for index, scope_spec in enumerate(self.scope):
                     self.scope[index] = scope_spec.replace(Const.BACKWARD, Const.FORWARD)
                     self.backward_input[self.scope[index]] = self.backward_input_list[index]
-        seed_all(self.seed, self.is_deterministic)
     def check_kwargs(self):
         if self.task and self.task not in Const.TASK_LIST:
-            raise Exception("task is invalid")
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"The task <{self.task}> is not in the {Const.TASK_LIST}.")
         if self.level and self.level not in Const.LEVEL_LIST:
-            raise Exception("level is invalid")
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"The level <{self.level}> is not in the {Const.LEVEL_LIST}.")
         if not self.dump_path:
-            raise Exception("Invalid dump path, please check your config")
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"The dump_path not found.")
     def check(self):
         self.check_kwargs()
-        self._check_rank()
-        self._check_step()
         return True
-    def check_model(self, model):
-        if self.level in ["L0", "mix"] and not model:
-            raise Exception(
-                f"For level {self.level}, PrecisionDebugger must receive a model argument."
-            )
-    def _check_rank(self):
-        if self.rank:
-            for rank_id in self.rank:
-                if not isinstance(rank_id, int) or rank_id < 0:
-                    raise ValueError(f"rank {self.rank} must be an integer and greater than or equal to 0.")
-            else:
-                logger.warning_on_rank_0(f"Rank argument is provided. Only rank {self.rank} data will be dumpped.")
-    def _check_step(self):
-        if self.step:
-            for s in self.step:
-                if not isinstance(s, int) or s < 0:
-                    raise ValueError(f"step element {s} must be an integer and greater than or equal to 0.")
+    def check_model(self, instance, start_model):
+        if self.level not in ["L0", "mix"]:
+            if instance.model is not None or start_model is not None:
+                logger.warning_on_rank_0(
+                    f"The current level is not L0 or mix level, so the model parameters will not be used.")
+            return
+        if start_model is None:
+            if instance.model is None:
+                logger.error_on_rank_0(
+                    f"For level {self.level}, PrecisionDebugger or start interface must receive a 'model' argument.")
+                raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR, f"missing the parameter 'model'")
+            return
+        if isinstance(start_model, torch.nn.Module):
+            instance.model = start_model
+        else:
+            logger.error_on_rank_0(f"The 'model' parameter of start must be a torch.nn.Module type.")
+            raise MsprobeException(
+                MsprobeException.INVALID_PARAM_ERROR, f"model must be a torch.nn.Module")

mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl