PyPI - mindstudio-probe - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/METADATA +1 -1
mindstudio_probe-1.1.0.dist-info/RECORD +287 -0
msprobe/README.md +46 -16
msprobe/__init__.py +16 -1
msprobe/config.json +0 -2
msprobe/core/advisor/advisor.py +8 -8
msprobe/core/advisor/advisor_const.py +6 -7
msprobe/core/advisor/advisor_result.py +12 -12
msprobe/core/common/const.py +64 -3
msprobe/core/common/exceptions.py +2 -2
msprobe/core/common/file_utils.py +54 -9
msprobe/core/common/inplace_op_checker.py +38 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +21 -11
msprobe/core/common/utils.py +153 -167
msprobe/core/common_config.py +18 -25
msprobe/core/compare/acc_compare.py +209 -36
msprobe/core/compare/check.py +102 -17
msprobe/core/compare/compare_cli.py +21 -1
msprobe/core/compare/highlight.py +41 -5
msprobe/core/compare/multiprocessing_compute.py +33 -8
msprobe/core/compare/npy_compare.py +21 -6
msprobe/core/compare/utils.py +82 -48
msprobe/core/data_dump/data_collector.py +31 -32
msprobe/core/data_dump/data_processor/base.py +45 -22
msprobe/core/data_dump/data_processor/factory.py +20 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +11 -5
msprobe/core/data_dump/data_processor/pytorch_processor.py +24 -7
msprobe/core/data_dump/json_writer.py +63 -42
msprobe/core/data_dump/scope.py +32 -16
msprobe/core/grad_probe/constant.py +4 -0
msprobe/core/grad_probe/grad_compare.py +2 -3
msprobe/core/grad_probe/utils.py +16 -3
msprobe/docs/01.installation.md +19 -9
msprobe/docs/02.config_introduction.md +52 -80
msprobe/docs/03.config_examples.md +3 -13
msprobe/docs/04.acl_config_examples.md +11 -9
msprobe/docs/05.data_dump_PyTorch.md +140 -12
msprobe/docs/06.data_dump_MindSpore.md +47 -5
msprobe/docs/07.accuracy_checker_PyTorch.md +57 -34
msprobe/docs/08.accuracy_checker_online_PyTorch.md +51 -11
msprobe/docs/09.accuracy_checker_MindSpore.md +8 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +181 -99
msprobe/docs/11.accuracy_compare_MindSpore.md +162 -31
msprobe/docs/13.overflow_check_MindSpore.md +1 -1
msprobe/docs/15.free_benchmarking_PyTorch.md +59 -53
msprobe/docs/16.free_benchmarking_MindSpore.md +140 -0
msprobe/docs/17.grad_probe.md +14 -16
msprobe/docs/18.online_dispatch.md +89 -0
msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +22 -10
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +1 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +35 -11
msprobe/mindspore/api_accuracy_checker/api_info.py +7 -0
msprobe/mindspore/cell_processor.py +27 -3
msprobe/mindspore/common/const.py +2 -0
msprobe/mindspore/common/utils.py +18 -2
msprobe/mindspore/compare/distributed_compare.py +9 -22
msprobe/mindspore/compare/layer_mapping.py +146 -0
msprobe/mindspore/compare/modify_mapping.py +107 -0
msprobe/mindspore/compare/ms_compare.py +173 -35
msprobe/mindspore/compare/ms_graph_compare.py +27 -11
msprobe/mindspore/debugger/debugger_config.py +16 -13
msprobe/mindspore/debugger/precision_debugger.py +37 -13
msprobe/mindspore/dump/dump_tool_factory.py +16 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +11 -1
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +206 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +82 -10
msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
msprobe/mindspore/dump/jit_dump.py +41 -17
msprobe/mindspore/dump/kernel_graph_dump.py +19 -3
msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -4
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +19 -4
msprobe/mindspore/free_benchmark/common/config.py +15 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +15 -0
msprobe/mindspore/free_benchmark/common/utils.py +19 -5
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +16 -2
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +18 -3
msprobe/mindspore/free_benchmark/handler/base_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/fix_handler.py +15 -0
msprobe/mindspore/free_benchmark/handler/handler_factory.py +18 -3
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +22 -7
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -0
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +22 -7
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +44 -18
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +18 -4
msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +20 -5
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +15 -0
msprobe/mindspore/grad_probe/global_context.py +18 -8
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
msprobe/mindspore/service.py +42 -123
msprobe/pytorch/__init__.py +20 -1
msprobe/pytorch/api_accuracy_checker/common/config.py +19 -2
msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +47 -21
msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +67 -32
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +26 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +19 -2
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +51 -125
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +146 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +21 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +78 -33
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +110 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +36 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +18 -3
msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +15 -0
msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
msprobe/pytorch/bench_functions/linear.py +15 -0
msprobe/pytorch/bench_functions/matmul_backward.py +21 -6
msprobe/pytorch/bench_functions/npu_fusion_attention.py +180 -151
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +28 -9
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
msprobe/pytorch/bench_functions/swiglu.py +20 -5
msprobe/pytorch/common/__init__.py +15 -0
msprobe/pytorch/common/log.py +18 -6
msprobe/pytorch/common/parse_json.py +26 -11
msprobe/pytorch/common/utils.py +40 -35
msprobe/pytorch/compare/distributed_compare.py +11 -11
msprobe/pytorch/compare/match.py +15 -0
msprobe/pytorch/compare/pt_compare.py +38 -6
msprobe/pytorch/debugger/debugger_config.py +52 -39
msprobe/pytorch/debugger/precision_debugger.py +72 -24
msprobe/pytorch/free_benchmark/__init__.py +20 -5
msprobe/pytorch/free_benchmark/common/enums.py +28 -0
msprobe/pytorch/free_benchmark/common/params.py +15 -0
msprobe/pytorch/free_benchmark/common/utils.py +17 -1
msprobe/pytorch/free_benchmark/compare/grad_saver.py +28 -7
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +15 -0
msprobe/pytorch/free_benchmark/main.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +26 -2
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +55 -16
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
msprobe/pytorch/function_factory.py +17 -2
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_stat_csv.py +2 -2
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +13 -8
msprobe/pytorch/hook_module/hook_module.py +17 -19
msprobe/pytorch/hook_module/utils.py +4 -6
msprobe/pytorch/hook_module/wrap_aten.py +12 -11
msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
msprobe/pytorch/hook_module/wrap_functional.py +10 -11
msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
msprobe/pytorch/hook_module/wrap_torch.py +4 -6
msprobe/pytorch/hook_module/wrap_vf.py +4 -6
msprobe/pytorch/module_processer.py +17 -2
msprobe/pytorch/online_dispatch/compare.py +11 -12
msprobe/pytorch/online_dispatch/single_compare.py +7 -7
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +8 -0
msprobe/pytorch/online_dispatch/utils.py +1 -4
msprobe/pytorch/parse.py +15 -0
msprobe/pytorch/parse_tool/cli.py +5 -6
msprobe/pytorch/parse_tool/lib/compare.py +9 -10
msprobe/pytorch/parse_tool/lib/parse_tool.py +3 -0
msprobe/pytorch/parse_tool/lib/utils.py +28 -24
msprobe/pytorch/parse_tool/lib/visualization.py +1 -1
msprobe/pytorch/pt_config.py +167 -38
msprobe/pytorch/service.py +97 -32
mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/functional/data_processor.py +0 -0
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/top_level.txt +0 -0

msprobe/mindspore/free_benchmark/perturbation/improve_precision.py CHANGED Viewed

@@ -1,13 +1,27 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Any
 import mindspore as ms
 from mindspore import Tensor, ops
-from msprobe.mindspore.free_benchmark.perturbation.base_perturbation import BasePerturbation
-from msprobe.mindspore.free_benchmark.common.handler_params import HandlerParams
-from msprobe.mindspore.common.const import FreeBenchmarkConst
-from msprobe.mindspore.common.log import logger
 from msprobe.mindspore.common.const import Const
+from msprobe.mindspore.common.log import logger
+from msprobe.mindspore.free_benchmark.common.handler_params import HandlerParams
+from msprobe.mindspore.free_benchmark.perturbation.base_perturbation import BasePerturbation
 class ImprovePrecisionPerturbation(BasePerturbation):

msprobe/mindspore/free_benchmark/perturbation/no_change.py CHANGED Viewed

@@ -1,7 +1,22 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Any
-from msprobe.mindspore.free_benchmark.perturbation.base_perturbation import BasePerturbation
 from msprobe.mindspore.free_benchmark.common.handler_params import HandlerParams
+from msprobe.mindspore.free_benchmark.perturbation.base_perturbation import BasePerturbation
 class NoChangePerturbation(BasePerturbation):

msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py CHANGED Viewed

@@ -1,10 +1,25 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from msprobe.mindspore.common.const import FreeBenchmarkConst
 from msprobe.mindspore.free_benchmark.common.config import Config
-from .add_noise import AddNoisePerturbation
-from .bit_noise import BitNoisePerturbation
-from .no_change import NoChangePerturbation
-from .improve_precision import ImprovePrecisionPerturbation
-from .exchange_value import ExchangeValuePerturbation
+from msprobe.mindspore.free_benchmark.perturbation.add_noise import AddNoisePerturbation
+from msprobe.mindspore.free_benchmark.perturbation.bit_noise import BitNoisePerturbation
+from msprobe.mindspore.free_benchmark.perturbation.exchange_value import ExchangeValuePerturbation
+from msprobe.mindspore.free_benchmark.perturbation.improve_precision import ImprovePrecisionPerturbation
+from msprobe.mindspore.free_benchmark.perturbation.no_change import NoChangePerturbation
 class PerturbationFactory:

msprobe/mindspore/free_benchmark/self_check_tool_factory.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from msprobe.mindspore.common.const import Const
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
 from msprobe.mindspore.free_benchmark.api_pynative_self_check import ApiPyNativeSelFCheck

msprobe/mindspore/grad_probe/global_context.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import os
 import threading
-from typing import Dict, Union
+from typing import Dict, Union, Tuple
-from msprobe.core.grad_probe.utils import check_str
+from msprobe.core.grad_probe.utils import check_str, check_bounds_element
 from msprobe.core.grad_probe.constant import GradConst
 from msprobe.mindspore.common.log import logger
 from msprobe.core.common.file_utils import create_directory, check_path_before_create
@@ -18,7 +18,7 @@ class GlobalContext:
         GradConst.STEP: None,
         GradConst.RANK: None,
         GradConst.CURRENT_STEP: 0,
-        GradConst.BOUNDS: [-10, -1, -0.1, -0.01, -0.001, 0, 0.001, 0.01, 0.1, 1, 10],
+        GradConst.BOUNDS: [-1, 0, 1],
         GradConst.OUTPUT_PATH: None
     }
@@ -31,19 +31,19 @@ class GlobalContext:
     def init_context(self, config_dict: Dict):
         level = config_dict.get(GradConst.LEVEL)
-        check_str(level, variable_name = "level in yaml")
+        check_str(level, variable_name="level in yaml")
         if level in GradConst.SUPPORTED_LEVEL:
             self._setting[GradConst.LEVEL] = config_dict.get(GradConst.LEVEL)
         else:
             raise ValueError("Invalid level set in config yaml file, level option: L0, L1, L2")
         self._set_input_list(config_dict, GradConst.PARAM_LIST, str)
-        self._set_input_list(config_dict, GradConst.BOUNDS, float)
+        self._set_input_list(config_dict, GradConst.BOUNDS, (float, int), element_check=check_bounds_element)
         self._set_input_list(config_dict, GradConst.STEP, int)
         self._set_input_list(config_dict, GradConst.RANK, int)
         output_path = config_dict.get(GradConst.OUTPUT_PATH)
-        check_str(output_path, variable_name = "output_path in yaml")
+        check_str(output_path, variable_name="output_path in yaml")
         try:
             check_path_before_create(output_path)
         except RuntimeError as err:
@@ -70,19 +70,29 @@ class GlobalContext:
         dump_rank_list = self.get_context(GradConst.RANK)
         return (not dump_rank_list) or (rank in dump_rank_list)
-    def _set_input_list(self, config_dict: Dict, name: str, dtype: Union[int, str, float]):
-        value = config_dict.get(name)
+    def _get_type_str(self, dtype: Union[int, str, float, Tuple[int, str, float]]):
+        if isinstance(dtype, tuple):
+            return "/".join([self._get_type_str(element) for element in dtype])
         if dtype == int:
             type_str = "integer"
         elif dtype == float:
             type_str = "float"
         else:
             type_str = "string"
+        return type_str
+    def _set_input_list(self, config_dict: Dict, name: str,
+                        dtype: Union[int, str, float, Tuple[int, str, float]], element_check=None):
+        value = config_dict.get(name)
+        type_str = self._get_type_str(dtype)
         if value and isinstance(value, list):
             for val in value:
                 if not isinstance(val, dtype):
                     logger.warning(f"Invalid {name} which must be None or list of {type_str}")
                     return
+                if element_check and not element_check(val):
+                    logger.warning(f"Given {name} violates some rules.")
+                    return
             self._setting[name] = value
         else:
             logger.warning(f"{name} is None or not a list with valid items, use default value.")

msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py CHANGED Viewed

@@ -1,8 +1,24 @@
-import os
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import json
-from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
-from msprobe.mindspore.common.log import logger
+import os
 from msprobe.core.common.file_utils import FileOpen, create_directory
+from msprobe.mindspore.common.log import logger
+from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
 class KernelGraphOverflowCheck:
@@ -16,7 +32,7 @@ class KernelGraphOverflowCheck:
         self.dump_json["common_dump_settings"]["saved_data"] = "full"
         self.dump_json["common_dump_settings"]["input_output"] = 0
         self.dump_json["common_dump_settings"]["kernels"] = []
-        self.dump_json["common_dump_settings"]["support_device"] = [0,1,2,3,4,5,6,7]
+        self.dump_json["common_dump_settings"]["support_device"] = [0, 1, 2, 3, 4, 5, 6, 7]
         self.dump_json["common_dump_settings"]["op_debug_mode"] = 3
         self.dump_json["common_dump_settings"]["file_format"] = "npy"

msprobe/mindspore/overflow_check/overflow_check_tool_factory.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from msprobe.mindspore.common.const import Const
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
 from msprobe.mindspore.overflow_check.kernel_graph_overflow_check import KernelGraphOverflowCheck

msprobe/mindspore/service.py CHANGED Viewed

@@ -34,9 +34,10 @@ from msprobe.core.data_dump.scope import BaseScope
 from msprobe.mindspore.common.utils import get_rank_if_initialized
 from msprobe.core.common.file_utils import create_directory
 from msprobe.mindspore.common.log import logger
-from msprobe.core.common.utils import Const
+from msprobe.core.common.utils import Const, print_tools_ends_info
 from msprobe.core.common.exceptions import DistributedNotInitializedError
 from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+from msprobe.mindspore.dump.hook_cell.primitive_hooks import PrimitiveHookService
 from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs, \
     ModuleBackwardInputs, ModuleBackwardOutputs
 from msprobe.core.common.exceptions import MsprobeException
@@ -52,11 +53,12 @@ class Service:
         self.config.level = self.config.level_ori
         self.data_collector = build_data_collector(self.config)
         self.cell_processor = CellProcessor(self.data_collector.scope)
+        self.primitive_hook_service = PrimitiveHookService(self)
         self.switch = False
+        self.primitive_switch = False
         self.current_iter = 0
         self.first_start = True
         self.current_rank = None
-        self.primitive_counters = {}
         self.dump_iter_dir = None
         self.start_call = False
         self.check_level_valid()
@@ -71,7 +73,7 @@ class Service:
         )
     def check_level_valid(self):
-        if self.config.level == "L2":
+        if self.config.level == Const.LEVEL_L2:
             raise MsprobeException(
                 MsprobeException.INVALID_PARAM_ERROR, "L2 level dump function is currently not supported."
             )
@@ -122,113 +124,6 @@ class Service:
         return wrap_forward_hook, wrap_backward_hook
-    def wrap_primitive(self, origin_func, primitive_name):
-        service_instance = self
-        def create_backward_hook(captured_grads, num_tensors, updated_primitive_name, hook_type):
-            def backward_hook(grad):
-                captured_grads.append(grad)
-                backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}"
-                try:
-                    if len(captured_grads) == num_tensors and hook_type == Const.INPUT:
-                        service_instance.data_collector.update_api_or_module_name(backward_primitive_name)
-                        new_module_input_output = ModuleBackwardOutputs(grad_output=tuple(captured_grads))
-                        service_instance.data_collector.backward_output_data_collect(
-                            backward_primitive_name, service_instance, os.getpid(), new_module_input_output
-                        )
-                        captured_grads.clear()
-                    elif len(captured_grads) == num_tensors and hook_type == Const.OUTPUT:
-                        service_instance.data_collector.update_api_or_module_name(backward_primitive_name)
-                        new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads))
-                        service_instance.data_collector.backward_input_data_collect(
-                            backward_primitive_name, service_instance, os.getpid(), new_module_input_output
-                        )
-                        captured_grads.clear()
-                except Exception as exception:
-                    raise Exception(f"This is a primitive op {hook_type}_backward dump error: {exception},"
-                                    f" updated_primitive_name: {updated_primitive_name}") from exception
-            return backward_hook
-        def hook_primitive_inputs(args, captured_grads_input, updated_primitive_name):
-            hooked_inputs = []
-            num_tensors = sum(isinstance(arg, Tensor) for arg in args)
-            input_backward_hook = create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name,
-                                                       Const.INPUT)
-            for _, arg in enumerate(args):
-                if isinstance(arg, Tensor):
-                    arg_hooked = ops.HookBackward(input_backward_hook)(arg)
-                    hooked_inputs.append(arg_hooked)
-                else:
-                    hooked_inputs.append(arg)
-            return hooked_inputs
-        def hook_primitive_outputs(out, captured_grads_output, updated_primitive_name):
-            if isinstance(out, tuple):
-                num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out)
-            else:
-                num_output_tensors = 1
-            output_backward_hook = create_backward_hook(captured_grads_output, num_output_tensors,
-                                                        updated_primitive_name, Const.OUTPUT)
-            if isinstance(out, Tensor):
-                return ops.HookBackward(output_backward_hook)(out)
-            elif isinstance(out, tuple):
-                hooked_outputs = []
-                for tensor in out:
-                    if isinstance(tensor, Tensor):
-                        hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor))
-                    else:
-                        hooked_outputs.append(tensor)
-                return tuple(hooked_outputs)
-            return out
-        def wrapped_primitive_call(instance_self, *args, **kwargs):
-            service_instance.update_primitive_counters(primitive_name)
-            current_count = service_instance.primitive_counters.get(primitive_name, 0)
-            updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}"
-            if not service_instance.switch:
-                return origin_func(*args, **kwargs)
-            captured_grads_input, captured_grads_output = [], []
-            try:
-                hooked_inputs = hook_primitive_inputs(args, captured_grads_input, updated_primitive_name)
-            except Exception as exception:
-                raise Exception("This is a primitive op dump error during input hooking: {},"
-                                " primitive_name: {}".format(exception, primitive_name)) from exception
-            try:
-                out = origin_func(*hooked_inputs, **kwargs)
-            except Exception as exception:
-                raise Exception("This is a primitive op dump error during function call: {},"
-                                " primitive_name: {}".format(exception, primitive_name)) from exception
-            forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}"
-            service_instance.data_collector.update_api_or_module_name(forward_primitive_name)
-            if service_instance.data_collector:
-                module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out)
-                try:
-                    service_instance.data_collector.forward_data_collect(forward_primitive_name, instance_self,
-                                                                         os.getpid(), module_input_output)
-                except Exception as exception:
-                    raise Exception("This is a primitive op dump error during forward data collection: {},"
-                                    " primitive_name: {}".format(exception, primitive_name)) from exception
-                if service_instance.data_collector.if_return_forward_new_output():
-                    out = service_instance.data_collector.get_forward_new_output()
-            try:
-                out = hook_primitive_outputs(out, captured_grads_output, updated_primitive_name)
-            except Exception as exception:
-                raise Exception("This is a primitive op dump error during output hooking: {},"
-                                " primitive_name: {}".format(exception, primitive_name)) from exception
-            return out
-        return wrapped_primitive_call
     def update_primitive_counters(self, primitive_name):
         if primitive_name not in self.primitive_counters:
@@ -236,7 +131,7 @@ class Service:
         else:
             self.primitive_counters[primitive_name] += 1
-    def register_hooks(self):
+    def register_primitive_hooks(self):
         primitive_set = set()
         for _, cell in self.model.cells_and_names():
             for pname, primitive in cell._primitives.items():
@@ -244,15 +139,17 @@ class Service:
         for pname, primitive in primitive_set:
             NewPrimitive = type('NewPrimitive', (primitive.__class__,),
-                                {'__call__': self.wrap_primitive(primitive.__call__, pname)})
+                                {'__call__': self.primitive_hook_service.wrap_primitive(primitive.__call__, pname)})
             primitive.__class__ = NewPrimitive
     def step(self):
         self.current_iter += 1
         self.data_collector.update_iter(self.current_iter)
         HOOKCell.cell_count = defaultdict(int)
-        CellProcessor.cell_count = {}
-        self.primitive_counters.clear()
+        CellProcessor.reset_cell_stats()
+        self.primitive_hook_service.primitive_counters.clear()
+        self.data_collector.data_writer.reset_cache()
+        JitDump.jit_count = defaultdict(int)
     def start(self, model=None):
         self.start_call = True
@@ -262,9 +159,8 @@ class Service:
             api_register.api_set_ori_func()
             self.should_stop_service = True
             self.switch = False
-            logger.info("************************************************")
-            logger.info(f"*          {Const.TOOL_NAME} ends successfully.          *")
-            logger.info("************************************************")
+            self.primitive_switch = False
+            print_tools_ends_info()
             return
         if self.config.step and self.current_iter not in self.config.step:
             return
@@ -281,7 +177,7 @@ class Service:
             if self.config.rank and self.current_rank not in self.config.rank:
                 return
             self.register_hook_new()
-            if self.config.level == "L1":
+            if self.config.level in [Const.LEVEL_MIX, Const.LEVEL_L1]:
                 JitDump.set_config(self.config)
                 JitDump.set_data_collector(self.data_collector)
                 ms.common.api._MindsporeFunctionExecutor = JitDump
@@ -291,10 +187,31 @@ class Service:
                     PIJitCaptureContext.__exit__ = self.empty
             self.first_start = False
+        api_register.api_set_hook_func()
         self.switch = True
+        self.primitive_switch = True
         logger.info(f"Dump switch is turned on at step {self.current_iter}. ")
         self.create_dirs()
         logger.info(f"Dump data will be saved in {self.dump_iter_dir}.")
+        JitDump.jit_dump_switch = True
+    def forward_backward_dump_end(self):
+        if self.should_stop_service:
+            return
+        logger.info(f"{Const.TOOL_NAME}: debugger.forward_backward_dump_end() is set successfully. ")
+        if not self.start_call:
+            logger.error(f"{Const.TOOL_NAME}: debugger.start() is not set in the current scope.")
+            raise Exception("debugger.start() is not set in the current scope.")
+        if not self.switch:
+            logger.error(f"{Const.TOOL_NAME}: debugger.forward_backward_dump_end() should be called between "
+                         "debugger.start() and debugger.stop() ")
+            raise Exception("debugger.stop() is already called. ")
+        if self.config.step and self.current_iter not in self.config.step:
+            return
+        if self.config.rank and self.current_rank not in self.config.rank:
+            return
+        self.primitive_switch = False
+        api_register.api_set_ori_func()
     def stop(self):
         if self.should_stop_service:
@@ -309,8 +226,10 @@ class Service:
         if self.config.rank and self.current_rank not in self.config.rank:
             return
         self.switch = False
+        self.primitive_switch = False
         self.start_call = False
         self.data_collector.write_json()
+        JitDump.jit_dump_switch = False
     def need_end_service(self):
         if self.config.step and self.current_iter > max(self.config.step):
@@ -349,16 +268,16 @@ class Service:
     def register_hook_new(self):
         logger.info("The {} hook function is successfully mounted to the model.".format(self.config.task))
-        if self.config.level == "L1":
+        if self.config.level in [Const.LEVEL_MIX, Const.LEVEL_L1]:
             api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API))
             api_register.api_set_hook_func()
-            if self.model:
-                self.register_hooks()
+            if self.model and self.config.task in Const.DUMP_DATA_COLLECTION_LIST:
+                self.register_primitive_hooks()
-        if self.config.level == "L0":
+        if self.config.level in [Const.LEVEL_MIX, Const.LEVEL_L0]:
             if not self.model:
                 raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
-                                       "The current level is L0, the model cannot be None")
+                                       f"The current level is {self.config.level}, the model cannot be None")
             for name, cell in self.model.cells_and_names():
                 if cell == self.model:
                     continue

msprobe/pytorch/__init__.py CHANGED Viewed

@@ -1,4 +1,23 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .debugger.precision_debugger import PrecisionDebugger
 from .common.utils import seed_all
 from .compare.distributed_compare import compare_distributed
-from .compare.pt_compare import compare
+from .compare.pt_compare import compare
+from .functional.module_dump import module_dump, module_dump_end

msprobe/pytorch/api_accuracy_checker/common/config.py CHANGED Viewed

@@ -1,3 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 from msprobe.core.common.file_utils import load_yaml, check_file_or_directory_path
 from msprobe.pytorch.pt_config import RunUTConfig
@@ -33,8 +50,8 @@ class Config:
             raise ValueError(f"{key} must be one of {validators.keys()}")
         if not isinstance(value, validators.get(key)):
             raise ValueError(f"{key} must be {validators[key].__name__} type")
-        if key == 'precision' and value < 0:
-            raise ValueError("precision must be greater than 0")
+        if key == 'precision' and (value < 0 or value > 20):
+            raise ValueError("precision must be greater than or equal to 0 and less than 21")
         if key == 'white_list':
             RunUTConfig.check_filter_list_config(key, value)
         if key == 'black_list':

mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl