PyPI - mindstudio-probe - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (278) hide show

{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
msprobe/README.md +84 -18
msprobe/__init__.py +16 -1
msprobe/config.json +1 -5
msprobe/core/advisor/advisor.py +16 -11
msprobe/core/advisor/advisor_const.py +6 -7
msprobe/core/advisor/advisor_result.py +12 -12
msprobe/core/common/const.py +164 -3
msprobe/core/common/exceptions.py +26 -4
msprobe/core/common/file_utils.py +196 -27
msprobe/core/common/inplace_op_checker.py +53 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +46 -18
msprobe/core/common/utils.py +308 -209
msprobe/core/common_config.py +60 -38
msprobe/core/compare/acc_compare.py +332 -94
msprobe/core/compare/check.py +104 -22
msprobe/core/compare/compare_cli.py +42 -5
msprobe/core/compare/highlight.py +162 -57
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
msprobe/core/compare/multiprocessing_compute.py +33 -8
msprobe/core/compare/npy_compare.py +73 -29
msprobe/core/compare/utils.py +306 -247
msprobe/core/data_dump/data_collector.py +44 -43
msprobe/core/data_dump/data_processor/base.py +88 -35
msprobe/core/data_dump/data_processor/factory.py +20 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +14 -8
msprobe/core/data_dump/data_processor/pytorch_processor.py +180 -66
msprobe/core/data_dump/json_writer.py +63 -42
msprobe/core/data_dump/scope.py +143 -48
msprobe/core/grad_probe/constant.py +31 -13
msprobe/core/grad_probe/grad_compare.py +20 -4
msprobe/core/grad_probe/utils.py +44 -3
msprobe/core/overflow_check/abnormal_scene.py +185 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +29 -9
msprobe/docs/02.config_introduction.md +83 -84
msprobe/docs/03.config_examples.md +3 -20
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +143 -13
msprobe/docs/06.data_dump_MindSpore.md +197 -88
msprobe/docs/07.accuracy_checker_PyTorch.md +69 -46
msprobe/docs/08.accuracy_checker_online_PyTorch.md +52 -17
msprobe/docs/09.accuracy_checker_MindSpore.md +51 -15
msprobe/docs/10.accuracy_compare_PyTorch.md +187 -99
msprobe/docs/11.accuracy_compare_MindSpore.md +253 -31
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +60 -55
msprobe/docs/16.free_benchmarking_MindSpore.md +159 -0
msprobe/docs/17.grad_probe.md +19 -22
msprobe/docs/18.online_dispatch.md +89 -0
msprobe/docs/19.monitor.md +468 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +386 -0
msprobe/docs/22.visualization_MindSpore.md +384 -0
msprobe/docs/23.tool_function_introduction.md +28 -0
msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +25 -10
msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +16 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +130 -138
msprobe/mindspore/api_accuracy_checker/api_info.py +27 -5
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
msprobe/mindspore/api_accuracy_checker/main.py +27 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +58 -13
msprobe/mindspore/common/const.py +35 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +60 -5
msprobe/mindspore/compare/distributed_compare.py +15 -28
msprobe/mindspore/compare/ms_compare.py +319 -158
msprobe/mindspore/compare/ms_graph_compare.py +99 -49
msprobe/mindspore/debugger/debugger_config.py +20 -14
msprobe/mindspore/debugger/precision_debugger.py +43 -13
msprobe/mindspore/dump/dump_tool_factory.py +18 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +23 -3
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +203 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +107 -10
msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
msprobe/mindspore/dump/jit_dump.py +56 -20
msprobe/mindspore/dump/kernel_graph_dump.py +19 -5
msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -6
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +162 -41
msprobe/mindspore/free_benchmark/common/config.py +15 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +15 -1
msprobe/mindspore/free_benchmark/common/utils.py +37 -8
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +20 -5
msprobe/mindspore/free_benchmark/handler/check_handler.py +21 -7
msprobe/mindspore/free_benchmark/handler/fix_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -6
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +23 -8
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +29 -5
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +25 -10
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +45 -19
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +29 -8
msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +22 -7
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +17 -2
msprobe/mindspore/grad_probe/global_context.py +44 -14
msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +24 -10
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/ms_config.py +22 -15
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -6
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +75 -150
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +24 -7
msprobe/pytorch/__init__.py +23 -3
msprobe/pytorch/api_accuracy_checker/common/config.py +81 -2
msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +50 -25
msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +73 -33
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +44 -18
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +32 -11
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +122 -172
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +158 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +30 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +68 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +115 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +26 -9
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +18 -3
msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +20 -1
msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
msprobe/pytorch/bench_functions/linear.py +15 -0
msprobe/pytorch/bench_functions/matmul_backward.py +33 -6
msprobe/pytorch/bench_functions/npu_fusion_attention.py +280 -157
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +32 -9
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
msprobe/pytorch/bench_functions/swiglu.py +29 -6
msprobe/pytorch/common/__init__.py +15 -0
msprobe/pytorch/common/log.py +18 -6
msprobe/pytorch/common/parse_json.py +31 -16
msprobe/pytorch/common/utils.py +96 -40
msprobe/pytorch/compare/distributed_compare.py +13 -14
msprobe/pytorch/compare/match.py +15 -0
msprobe/pytorch/compare/pt_compare.py +44 -10
msprobe/pytorch/debugger/debugger_config.py +69 -52
msprobe/pytorch/debugger/precision_debugger.py +72 -24
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/free_benchmark/__init__.py +20 -5
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +43 -0
msprobe/pytorch/free_benchmark/common/params.py +23 -1
msprobe/pytorch/free_benchmark/common/utils.py +43 -5
msprobe/pytorch/free_benchmark/compare/grad_saver.py +47 -9
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +17 -0
msprobe/pytorch/free_benchmark/main.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +18 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +21 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +28 -2
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +19 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +65 -16
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +21 -5
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
msprobe/pytorch/function_factory.py +17 -2
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +13 -8
msprobe/pytorch/hook_module/hook_module.py +17 -19
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
msprobe/pytorch/hook_module/utils.py +4 -6
msprobe/pytorch/hook_module/wrap_aten.py +12 -11
msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
msprobe/pytorch/hook_module/wrap_functional.py +21 -20
msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
msprobe/pytorch/hook_module/wrap_torch.py +4 -6
msprobe/pytorch/hook_module/wrap_vf.py +4 -6
msprobe/pytorch/module_processer.py +18 -6
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +340 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +870 -0
msprobe/pytorch/monitor/module_metric.py +193 -0
msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
msprobe/pytorch/monitor/optimizer_collect.py +295 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
msprobe/pytorch/monitor/utils.py +250 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +38 -48
msprobe/pytorch/online_dispatch/dispatch.py +50 -25
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +60 -39
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +9 -1
msprobe/pytorch/online_dispatch/utils.py +48 -23
msprobe/pytorch/parse.py +15 -0
msprobe/pytorch/parse_tool/cli.py +5 -6
msprobe/pytorch/parse_tool/lib/compare.py +19 -26
msprobe/pytorch/parse_tool/lib/config.py +1 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +4 -2
msprobe/pytorch/parse_tool/lib/utils.py +40 -55
msprobe/pytorch/parse_tool/lib/visualization.py +3 -1
msprobe/pytorch/pt_config.py +192 -40
msprobe/pytorch/service.py +110 -35
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +165 -0
msprobe/visualization/builder/msprobe_adapter.py +205 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +130 -0
msprobe/visualization/compare/mode_adapter.py +211 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +124 -0
msprobe/visualization/graph/graph.py +200 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +214 -0
msprobe/visualization/utils.py +232 -0
mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
msprobe/docs/04.acl_config_examples.md +0 -76
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -43
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -107
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0
/msprobe/pytorch/{functional/data_processor.py → monitor/distributed/__init__.py} +0 -0

msprobe/pytorch/debugger/debugger_config.py CHANGED Viewed

@@ -1,6 +1,23 @@
-from msprobe.pytorch.common import seed_all
-from msprobe.pytorch.common.log import logger
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import MsprobeException
+from msprobe.pytorch.common.log import logger
 class DebuggerConfig:
@@ -10,30 +27,28 @@ class DebuggerConfig:
         self.rank = common_config.rank if common_config.rank else []
         self.step = common_config.step if common_config.step else []
         self.level = level or common_config.level or "L1"
-        self.seed = common_config.seed if common_config.seed else 1234
-        self.is_deterministic = common_config.is_deterministic
         self.enable_dataloader = common_config.enable_dataloader
         self.scope = task_config.scope if task_config.scope else []
         self.list = task_config.list if task_config.list else []
         self.data_mode = task_config.data_mode if task_config.data_mode else ["all"]
-        self.backward_input_list = task_config.backward_input if task_config.backward_input else []
-        self.backward_input = {}
-        self.acl_config = common_config.acl_config if common_config.acl_config else ""
-        self.is_forward_acl_dump = True
         self.summary_mode = task_config.summary_mode if task_config.summary_mode else Const.STATISTICS
         self.overflow_nums = task_config.overflow_nums if task_config.overflow_nums else 1
         self.framework = Const.PT_FRAMEWORK
+        if self.level == Const.LEVEL_L2:
+            self.is_backward_kernel_dump = False
+            self._check_and_adjust_config_with_l2()
         if self.task == Const.FREE_BENCHMARK:
-            self.fuzz_device = task_config.fuzz_device if task_config.fuzz_device else 'npu'
-            self.handler_type = task_config.handler_type if task_config.handler_type else 'check'
-            self.pert_mode = task_config.pert_mode if task_config.pert_mode else 'improve_precision'
-            self.fuzz_level = task_config.fuzz_level if task_config.fuzz_level else 'L1'
-            self.fuzz_stage = task_config.fuzz_stage if task_config.fuzz_stage else 'forward'
+            self.fuzz_device = task_config.fuzz_device
+            self.handler_type = task_config.handler_type
+            self.pert_mode = task_config.pert_mode
+            self.fuzz_level = task_config.fuzz_level
+            self.fuzz_stage = task_config.fuzz_stage
             self.preheat_config = {
-                "if_preheat": task_config.if_preheat if task_config.if_preheat is not None else True,
-                "preheat_step": task_config.preheat_step if task_config.preheat_step else 15,
-                "max_sample": task_config.max_sample if task_config.max_sample else 20,
+                "if_preheat": task_config.if_preheat,
+                "preheat_step": task_config.preheat_step,
+                "max_sample": task_config.max_sample
             }
         self.online_run_ut = False
@@ -44,52 +59,54 @@ class DebuggerConfig:
             self.tls_path = task_config.tls_path if task_config.tls_path else ""
             self.host = task_config.host if task_config.host else ""
             self.port = task_config.port if task_config.port else -1
+            self.online_run_ut_recompute = task_config.online_run_ut_recompute \
+                if isinstance(task_config.online_run_ut_recompute, bool) else False
         self.check()
-        if self.step:
-            self.step.sort()
-        if self.level == "L2":
-            if not self.scope or not isinstance(self.scope, list) or len(self.scope) != 1:
-                raise ValueError("scope must be configured as a list with one api name")
-            if isinstance(self.scope[0], str) and Const.BACKWARD in self.scope[0] and not self.backward_input_list:
-                raise ValueError("backward_input must be configured when scope contains 'backward'")
-            if Const.BACKWARD in self.scope[0]:
-                self.is_forward_acl_dump = False
-                for index, scope_spec in enumerate(self.scope):
-                    self.scope[index] = scope_spec.replace(Const.BACKWARD, Const.FORWARD)
-                    self.backward_input[self.scope[index]] = self.backward_input_list[index]
-        seed_all(self.seed, self.is_deterministic)
     def check_kwargs(self):
         if self.task and self.task not in Const.TASK_LIST:
-            raise Exception("task is invalid")
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"The task <{self.task}> is not in the {Const.TASK_LIST}.")
         if self.level and self.level not in Const.LEVEL_LIST:
-            raise Exception("level is invalid")
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"The level <{self.level}> is not in the {Const.LEVEL_LIST}.")
         if not self.dump_path:
-            raise Exception("Invalid dump path, please check your config")
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"The dump_path not found.")
     def check(self):
         self.check_kwargs()
-        self._check_rank()
-        self._check_step()
         return True
-    def check_model(self, model):
-        if self.level in ["L0", "mix"] and not model:
-            raise Exception(
-                f"For level {self.level}, PrecisionDebugger must receive a model argument."
-            )
-    def _check_rank(self):
-        if self.rank:
-            for rank_id in self.rank:
-                if not isinstance(rank_id, int) or rank_id < 0:
-                    raise ValueError(f"rank {self.rank} must be an integer and greater than or equal to 0.")
-            else:
-                logger.warning_on_rank_0(f"Rank argument is provided. Only rank {self.rank} data will be dumpped.")
+    def check_model(self, instance, start_model):
+        if self.level not in ["L0", "mix"]:
+            if instance.model is not None or start_model is not None:
+                logger.warning_on_rank_0(
+                    f"The current level is not L0 or mix level, so the model parameters will not be used.")
+            return
+        if start_model is None:
+            if instance.model is None:
+                logger.error_on_rank_0(
+                    f"For level {self.level}, PrecisionDebugger or start interface must receive a 'model' argument.")
+                raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR, f"missing the parameter 'model'")
+            return
+        if isinstance(start_model, torch.nn.Module):
+            instance.model = start_model
+        else:
+            logger.error_on_rank_0(f"The 'model' parameter of start must be a torch.nn.Module type.")
+            raise MsprobeException(
+                MsprobeException.INVALID_PARAM_ERROR, f"model must be a torch.nn.Module")
-    def _check_step(self):
-        if self.step:
-            for s in self.step:
-                if not isinstance(s, int) or s < 0:
-                    raise ValueError(f"step element {s} must be an integer and greater than or equal to 0.")
+    def _check_and_adjust_config_with_l2(self):
+        if self.scope:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"When level is set to L2, the scope cannot be configured.")
+        if not self.list or len(self.list) != 1:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"When level is set to L2, the list must be configured as a list with one api name.")
+        api_name = self.list[0]
+        if api_name.endswith(Const.BACKWARD):
+            self.is_backward_kernel_dump = True
+            api_forward_name = api_name[:-len(Const.BACKWARD)] + Const.FORWARD
+            self.list.append(api_forward_name)

msprobe/pytorch/debugger/precision_debugger.py CHANGED Viewed

@@ -1,12 +1,34 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
 import torch
-from torch.utils.data import dataloader
-from msprobe.pytorch.debugger.debugger_config import DebuggerConfig
-from msprobe.pytorch.service import Service
-from msprobe.pytorch.common.log import logger
-from msprobe.pytorch.pt_config import parse_json_config
+from msprobe.core.common.const import Const, FileCheckConst, MsgConst
 from msprobe.core.common.exceptions import MsprobeException
-from msprobe.core.common.const import Const
+from msprobe.core.common.file_utils import FileChecker
+from msprobe.core.common.utils import get_real_step_or_rank
+from msprobe.pytorch.common.log import logger
+from msprobe.pytorch.debugger.debugger_config import DebuggerConfig
 from msprobe.pytorch.grad_probe.grad_monitor import GradientMonitor
+from msprobe.pytorch.pt_config import parse_json_config
+from msprobe.pytorch.service import Service
+from torch.utils.data import dataloader
+ConfigParameters = namedtuple("ConfigParameters", ["config_path", "task",
+                                                   "dump_path", "level", "model"])
 class PrecisionDebugger:
@@ -30,20 +52,26 @@ class PrecisionDebugger:
         step=None,
     ):
         if not hasattr(self, "initialized"):
+            config_params = ConfigParameters(config_path,
+                                             task,
+                                             dump_path,
+                                             level,
+                                             model)
+            self.check_input_params(config_params)
             self.api_origin = False
             self.initialized = True
-            self.model = self.check_model_valid(model)
+            self.model = model
             common_config, task_config = parse_json_config(config_path, task)
-            self.task = common_config.task
+            self.task = task if task else common_config.task
             if self.task == Const.GRAD_PROBE:
                 self.gm = GradientMonitor(common_config, task_config)
                 return
             if step:
-                common_config.step = step
+                common_config.step = get_real_step_or_rank(step, Const.STEP)
             self.config = DebuggerConfig(
                 common_config, task_config, task, dump_path, level
             )
-            self.config.check_model(self.model)
             self.service = Service(self.config)
             self.enable_dataloader = self.config.enable_dataloader
             if self.enable_dataloader:
@@ -55,20 +83,40 @@ class PrecisionDebugger:
         return self._instance
     @staticmethod
-    def check_model_valid(model):
-        if not model or isinstance(model, torch.nn.Module):
-            return model
-        raise MsprobeException(
-            MsprobeException.INVALID_PARAM_ERROR, "model 参数必须是torch.nn.Module类型。"
-        )
+    def check_input_params(args):
+        if args.config_path is not None:
+            if not isinstance(args.config_path, str):
+                raise MsprobeException(
+                    MsprobeException.INVALID_PARAM_ERROR, f"config_path must be a string")
+            file_checker = FileChecker(
+                file_path=args.config_path, path_type=FileCheckConst.FILE, file_type=FileCheckConst.JSON_SUFFIX)
+            file_checker.common_check()
+        if args.task is not None and args.task not in Const.TASK_LIST:
+            raise MsprobeException(
+                MsprobeException.INVALID_PARAM_ERROR, f"task must be one of {Const.TASK_LIST}")
+        if args.dump_path is not None:
+            if not isinstance(args.dump_path, str):
+                raise MsprobeException(
+                    MsprobeException.INVALID_PARAM_ERROR, f"dump_path must be a string")
+        if args.level is not None and args.level not in Const.LEVEL_LIST:
+            raise MsprobeException(
+                MsprobeException.INVALID_PARAM_ERROR, f"level must be one of {Const.LEVEL_LIST}")
+        if args.model is not None and not isinstance(args.model, torch.nn.Module):
+            raise MsprobeException(
+                MsprobeException.INVALID_PARAM_ERROR, f"model must be a torch.nn.Module")
     @classmethod
-    def start(cls):
+    def start(cls, model=None):
         instance = cls._instance
+        if not instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
         if instance.task in PrecisionDebugger.tasks_not_need_debugger:
             return
-        if not instance:
-            raise Exception("No instance of PrecisionDebugger found.")
+        instance.config.check_model(instance, model)
         if instance.enable_dataloader:
             logger.warning_on_rank_0("DataLoader is enabled, start() skipped.")
         else:
@@ -85,10 +133,10 @@ class PrecisionDebugger:
     @classmethod
     def stop(cls):
         instance = cls._instance
+        if not instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
         if instance.task in PrecisionDebugger.tasks_not_need_debugger:
             return
-        if not instance:
-            raise Exception("PrecisionDebugger instance is not created.")
         if instance.enable_dataloader:
             logger.warning_on_rank_0("DataLoader is enabled, stop() skipped.")
         else:
@@ -96,16 +144,16 @@ class PrecisionDebugger:
     @classmethod
     def step(cls):
+        if not cls._instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
         if cls._instance.task in PrecisionDebugger.tasks_not_need_debugger:
             return
-        if not cls._instance:
-            raise Exception("PrecisionDebugger instance is not created.")
         cls._instance.service.step()
     @classmethod
     def monitor(cls, model):
         if not cls._instance:
-            raise Exception("PrecisionDebugger instance is not created.")
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
         if cls._instance.task != Const.GRAD_PROBE:
             return
         cls._instance.gm.monitor(model)

msprobe/pytorch/dump/kernel_dump/kernel_config.py ADDED Viewed

@@ -0,0 +1,33 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from msprobe.core.common.file_utils import save_json
+def create_kernel_config_json(dump_path, cur_rank):
+    kernel_config_name = "kernel_config.json" if cur_rank == '' else f"kernel_config_{cur_rank}.json"
+    kernel_config_path = os.path.join(dump_path, kernel_config_name)
+    config_info = {
+        "dump": {
+            "dump_list": [],
+            "dump_path": dump_path,
+            "dump_mode": "all",
+            "dump_op_switch": "on"
+        }
+    }
+    save_json(kernel_config_path, config_info, indent=4)
+    return kernel_config_path

msprobe/pytorch/free_benchmark/__init__.py CHANGED Viewed

@@ -1,8 +1,23 @@
-from msprobe.pytorch.common.log import logger
-from msprobe.core.common.exceptions import FreeBenchmarkException
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__all__ = ["FreeBenchmarkCheck", "UnequalRow"]
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import FreeBenchmarkException
+from msprobe.pytorch.common.log import logger
-from .main import FreeBenchmarkCheck
 from .common.params import UnequalRow
-__all__ = [FreeBenchmarkCheck, UnequalRow]
+from .main import FreeBenchmarkCheck

msprobe/pytorch/free_benchmark/common/constant.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Dict
 import numpy as np

msprobe/pytorch/free_benchmark/common/counter.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from collections import defaultdict
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig

msprobe/pytorch/free_benchmark/common/enums.py CHANGED Viewed

@@ -1,3 +1,21 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from msprobe.core.common.const import Const
 class PerturbationMode:
     ADD_NOISE = "add_noise"
     CHANGE_VALUE = "change_value"
@@ -35,3 +53,28 @@ class FuzzLevel:
     BASE_LEVEL = "L1"
     ADV_LEVEL = "L2"
     REAL_LEVEL = "L3"
+class PytorchFreeBenchmarkConst:
+    PERTURBATION_MODE_LIST = [
+        PerturbationMode.ADD_NOISE,
+        PerturbationMode.CHANGE_VALUE,
+        PerturbationMode.IMPROVE_PRECISION,
+        PerturbationMode.NO_CHANGE,
+        PerturbationMode.BIT_NOISE,
+        PerturbationMode.TO_CPU,
+    ]
+    DEFAULT_MODE = PerturbationMode.IMPROVE_PRECISION
+    DEVICE_LIST = [DeviceType.NPU, DeviceType.CPU]
+    DEFAULT_DEVICE = DeviceType.NPU
+    HANDLER_LIST = [HandlerType.CHECK, HandlerType.FIX]
+    DEFAULT_HANDLER = HandlerType.CHECK
+    FUZZ_LEVEL_LIST = [FuzzLevel.BASE_LEVEL]
+    DEFAULT_FUZZ_LEVEL = FuzzLevel.BASE_LEVEL
+    FUZZ_STAGE_LIST = [Const.FORWARD, Const.BACKWARD]
+    FIX_MODE_LIST = [PerturbationMode.IMPROVE_PRECISION, PerturbationMode.TO_CPU]
+    DEFAULT_FUZZ_STAGE = Const.FORWARD
+    DEFAULT_PREHEAT_STEP = 15
+    DEFAULT_MAX_SAMPLE = 20
+    CPU_MODE_LIST = [PerturbationMode.TO_CPU]
+    FIX_STAGE_LIST = [Const.FORWARD]

msprobe/pytorch/free_benchmark/common/params.py CHANGED Viewed

@@ -1,7 +1,23 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional, Tuple
 import torch
+from msprobe.core.common.exceptions import FreeBenchmarkException
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.enums import (
     DeviceType,
@@ -113,7 +129,13 @@ def make_unequal_row(
         row.max_rel = ratio - 1
     origin_tensor = data_params.original_result
     perturbed_tensor = data_params.perturbed_result
-    if index:
+    if index is not None:
+        if index >= len(origin_tensor) or index >= len(perturbed_tensor):
+            err_msg = f"When generating unequal results, index {index} of output is out of bounds. please check!"
+            raise FreeBenchmarkException(
+                FreeBenchmarkException.OutputIndexError,
+                error_info=err_msg,
+            )
         origin_tensor = origin_tensor[index]
         perturbed_tensor = perturbed_tensor[index]
         row.output_index = index

msprobe/pytorch/free_benchmark/common/utils.py CHANGED Viewed

@@ -1,4 +1,22 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import torch
+from msprobe.core.common.exceptions import FreeBenchmarkException
+from msprobe.core.common.utils import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark.common.enums import DeviceType
@@ -36,6 +54,7 @@ class Tools:
         return api_name.rsplit(".", 2)[0]
     @staticmethod
+    @recursion_depth_decorator("FreeBenchmark: Tools.convert_device_and_dtype")
     def convert_device_and_dtype(
         tensor_seq, device: str = DeviceType.CPU, change_dtype: bool = False
     ):
@@ -58,24 +77,43 @@ class Tools:
         return tensor_seq
     @staticmethod
+    @recursion_depth_decorator("FreeBenchmark: Tools.convert_fuzz_output_to_origin")
     def convert_fuzz_output_to_origin(origin, perturbed):
-        if isinstance(origin, torch.Tensor):
+        if isinstance(origin, torch.Tensor) and isinstance(perturbed, torch.Tensor):
             origin.data = perturbed.to(origin.dtype).to(origin.device)
             return origin
-        if isinstance(origin, dict):
+        if isinstance(origin, dict) and isinstance(perturbed, dict):
             output = dict()
             for key, value in origin.items():
+                if key not in perturbed:
+                    err_msg = f"'{key}' not in perturbed output."
+                    raise FreeBenchmarkException(
+                        FreeBenchmarkException.InvalidPerturbedOutput,
+                        error_info=err_msg,
+                    )
                 output[key] = Tools.convert_fuzz_output_to_origin(value, perturbed[key])
             return output
-        if isinstance(origin, (tuple, list)):
+        if isinstance(origin, (tuple, list)) and isinstance(perturbed, (tuple, list)):
             result = list()
+            if len(perturbed) != len(origin):
+                err_msg = (
+                    f"length of perturbed output ({len(perturbed)}) is different "
+                    f"from the length of original output ({len(origin)})."
+                )
+                raise FreeBenchmarkException(
+                    FreeBenchmarkException.InvalidPerturbedOutput, error_info=err_msg
+                )
             for index_, value in enumerate(origin):
                 result.append(
                     Tools.convert_fuzz_output_to_origin(value, perturbed[index_])
                 )
             return type(origin)(result)
-        return origin
+        err_msg = f"conversion of two outputs with types ({type(origin)}, {type(perturbed)}) is not supported."
+        raise FreeBenchmarkException(
+            FreeBenchmarkException.UnsupportedType, error_info=err_msg
+        )
 class TorchC:
     sum = torch._C._VariableFunctionsClass.sum
     isinf = torch._C._VariableFunctionsClass.isinf

mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.1py3-none-any.whl