PyPI - mindstudio-probe - Versions diffs - 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.0py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (299) hide show

{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/METADATA +7 -6
mindstudio_probe-1.2.1.dist-info/RECORD +396 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/entry_points.txt +0 -1
msprobe/CMakeLists.txt +5 -0
msprobe/README.md +51 -20
msprobe/config.json +2 -3
msprobe/core/advisor/advisor.py +8 -3
msprobe/core/common/const.py +264 -15
msprobe/core/common/exceptions.py +27 -3
msprobe/core/common/file_utils.py +176 -26
msprobe/core/common/inplace_op_checker.py +15 -0
msprobe/core/common/inplace_ops.yaml +3 -0
msprobe/core/common/log.py +27 -9
msprobe/core/common/utils.py +204 -77
msprobe/core/common_config.py +49 -14
msprobe/core/compare/acc_compare.py +274 -198
msprobe/core/compare/check.py +32 -33
msprobe/core/compare/compare_cli.py +32 -14
msprobe/core/compare/highlight.py +283 -127
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +246 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +249 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +95 -0
msprobe/core/compare/merge_result/merge_result.py +380 -0
msprobe/core/compare/merge_result/merge_result_cli.py +31 -0
msprobe/core/compare/multiprocessing_compute.py +2 -2
msprobe/core/compare/npy_compare.py +135 -144
msprobe/core/compare/utils.py +419 -274
msprobe/core/data_dump/data_collector.py +60 -28
msprobe/core/data_dump/data_processor/base.py +84 -36
msprobe/core/data_dump/data_processor/factory.py +5 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +152 -18
msprobe/core/data_dump/data_processor/pytorch_processor.py +267 -110
msprobe/core/data_dump/json_writer.py +29 -1
msprobe/core/data_dump/scope.py +119 -39
msprobe/core/grad_probe/constant.py +27 -13
msprobe/core/grad_probe/grad_compare.py +18 -1
msprobe/core/grad_probe/utils.py +30 -2
msprobe/core/overflow_check/abnormal_scene.py +189 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +96 -7
msprobe/docs/02.config_introduction.md +50 -23
msprobe/docs/03.config_examples.md +2 -9
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +93 -61
msprobe/docs/06.data_dump_MindSpore.md +200 -95
msprobe/docs/07.accuracy_checker_PyTorch.md +28 -28
msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +114 -50
msprobe/docs/11.accuracy_compare_MindSpore.md +340 -48
msprobe/docs/12.overflow_check_PyTorch.md +2 -2
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
msprobe/docs/17.grad_probe.md +5 -6
msprobe/docs/19.monitor.md +561 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +466 -0
msprobe/docs/22.visualization_MindSpore.md +481 -0
msprobe/docs/23.generate_operator_PyTorch.md +107 -0
msprobe/docs/24.code_mapping_Mindspore.md +28 -0
msprobe/docs/25.tool_function_introduction.md +29 -0
msprobe/docs/26.data_dump_PyTorch_baseline.md +37 -0
msprobe/docs/27.dump_json_instruction.md +521 -0
msprobe/docs/FAQ.md +29 -2
msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +14 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +22 -0
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
msprobe/docs/img/visualization/tensorboard_1.png +0 -0
msprobe/docs/img/visualization/tensorboard_2.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_browser_2.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/GPTModel.png +0 -0
msprobe/docs/visualization/ParallelMLP.png +0 -0
msprobe/docs/visualization/layer_mapping_example.md +132 -0
msprobe/docs/visualization/mapping.png +0 -0
msprobe/docs/visualization/mapping1.png +0 -0
msprobe/docs/visualization/module_name.png +0 -0
msprobe/docs/visualization/module_name1.png +0 -0
msprobe/docs/visualization/no_mapping.png +0 -0
msprobe/docs/visualization/no_mapping1.png +0 -0
msprobe/docs/visualization/no_mapping_analyze.png +0 -0
msprobe/docs/visualization/top_layer.png +0 -0
msprobe/mindspore/__init__.py +25 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -151
msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +64 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +64 -31
msprobe/mindspore/api_accuracy_checker/data_manager.py +301 -0
msprobe/mindspore/api_accuracy_checker/main.py +28 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +212 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +60 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +33 -12
msprobe/mindspore/code_mapping/bind.py +264 -0
msprobe/mindspore/code_mapping/cmd_parser.py +40 -0
msprobe/mindspore/code_mapping/graph.py +49 -0
msprobe/mindspore/code_mapping/graph_parser.py +226 -0
msprobe/mindspore/code_mapping/main.py +24 -0
msprobe/mindspore/code_mapping/processor.py +34 -0
msprobe/mindspore/common/const.py +35 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +88 -4
msprobe/mindspore/compare/distributed_compare.py +22 -24
msprobe/mindspore/compare/ms_compare.py +333 -268
msprobe/mindspore/compare/ms_graph_compare.py +95 -52
msprobe/mindspore/debugger/debugger_config.py +7 -1
msprobe/mindspore/debugger/precision_debugger.py +87 -12
msprobe/mindspore/dump/dump_tool_factory.py +3 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +95 -18
msprobe/mindspore/dump/hook_cell/hook_cell.py +60 -38
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +45 -30
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +36 -1
msprobe/mindspore/dump/hook_cell/wrap_api.py +92 -1
msprobe/mindspore/dump/jit_dump.py +17 -5
msprobe/mindspore/dump/kernel_dump/kernel_config.py +33 -0
msprobe/mindspore/dump/kernel_graph_dump.py +9 -4
msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +156 -41
msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
msprobe/mindspore/free_benchmark/common/utils.py +19 -4
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
msprobe/mindspore/grad_probe/global_context.py +28 -8
msprobe/mindspore/grad_probe/grad_analyzer.py +50 -24
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +35 -12
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/mindtorch/__init__.py +18 -0
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +255 -0
msprobe/mindspore/ms_config.py +27 -16
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +9 -4
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +285 -113
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +48 -10
msprobe/pytorch/__init__.py +8 -6
msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
msprobe/pytorch/api_accuracy_checker/common/utils.py +31 -16
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -8
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +103 -271
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +4 -1
msprobe/pytorch/api_accuracy_checker/compare/compare.py +69 -68
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +54 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +51 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +2 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +478 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +106 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +107 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +151 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +226 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +68 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +218 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +104 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +63 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +200 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +63 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +21 -15
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +54 -22
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +140 -71
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +49 -8
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +5 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +142 -16
msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
msprobe/pytorch/bench_functions/swiglu.py +10 -2
msprobe/pytorch/common/parse_json.py +7 -6
msprobe/pytorch/common/utils.py +101 -7
msprobe/pytorch/compare/distributed_compare.py +17 -30
msprobe/pytorch/compare/pt_compare.py +44 -22
msprobe/pytorch/debugger/debugger_config.py +46 -27
msprobe/pytorch/debugger/precision_debugger.py +42 -12
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/dump/module_dump/module_dump.py +86 -0
msprobe/pytorch/{module_processer.py → dump/module_dump/module_processer.py} +81 -10
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +15 -0
msprobe/pytorch/free_benchmark/common/params.py +10 -2
msprobe/pytorch/free_benchmark/common/utils.py +29 -4
msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -5
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +41 -47
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -4
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/__init__.py +1 -1
msprobe/pytorch/hook_module/hook_module.py +14 -11
msprobe/pytorch/hook_module/register_optimizer_hook.py +59 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +35 -0
msprobe/pytorch/hook_module/wrap_distributed.py +6 -8
msprobe/pytorch/hook_module/wrap_functional.py +0 -38
msprobe/pytorch/monitor/__init__.py +0 -0
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +425 -0
msprobe/pytorch/monitor/csv2tb.py +166 -0
msprobe/pytorch/monitor/distributed/__init__.py +0 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +283 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +1076 -0
msprobe/pytorch/monitor/module_metric.py +172 -0
msprobe/pytorch/monitor/module_spec_verifier.py +95 -0
msprobe/pytorch/monitor/optimizer_collect.py +333 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +160 -0
msprobe/pytorch/monitor/utils.py +321 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +29 -38
msprobe/pytorch/online_dispatch/dispatch.py +58 -27
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +53 -32
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
msprobe/pytorch/online_dispatch/utils.py +49 -21
msprobe/pytorch/parse_tool/lib/compare.py +21 -27
msprobe/pytorch/parse_tool/lib/config.py +6 -8
msprobe/pytorch/parse_tool/lib/file_desc.py +15 -1
msprobe/pytorch/parse_tool/lib/interactive_cli.py +10 -10
msprobe/pytorch/parse_tool/lib/parse_exception.py +7 -7
msprobe/pytorch/parse_tool/lib/parse_tool.py +12 -12
msprobe/pytorch/parse_tool/lib/utils.py +33 -53
msprobe/pytorch/parse_tool/lib/visualization.py +11 -10
msprobe/pytorch/pt_config.py +31 -8
msprobe/pytorch/service.py +188 -108
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +222 -0
msprobe/visualization/builder/msprobe_adapter.py +227 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +180 -0
msprobe/visualization/compare/mode_adapter.py +197 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +119 -0
msprobe/visualization/graph/distributed_analyzer.py +318 -0
msprobe/visualization/graph/graph.py +209 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +288 -0
msprobe/visualization/utils.py +217 -0
mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
msprobe/docs/04.acl_config_examples.md +0 -78
msprobe/mindspore/compare/layer_mapping.py +0 -146
msprobe/mindspore/compare/modify_mapping.py +0 -107
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
msprobe/pytorch/functional/module_dump.py +0 -84
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/top_level.txt +0 -0
/msprobe/mindspore/{free_benchmark/decorator → code_mapping}/__init__.py +0 -0
/msprobe/pytorch/{functional → dump/module_dump}/__init__.py +0 -0

msprobe/mindspore/dump/jit_dump.py CHANGED Viewed

@@ -20,7 +20,7 @@ from mindspore import Tensor
 from mindspore._c_expression import PyNativeExecutor_
 from mindspore.common.api import _MindsporeFunctionExecutor
-from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+from msprobe.core.common.log import logger
 from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs
 from msprobe.core.common.const import Const
 from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs
@@ -33,6 +33,8 @@ def dump_jit(name, in_feat, out_feat, is_forward):
     index = ori_args.find("<")
     if index != 0 and index != -1:
         result = ori_args[0:index]
+    elif name is not None and "<" not in str(name):
+        result = str(name)
     else:
         result = "JitFunction"
     if JitDump.need_dump():
@@ -47,7 +49,7 @@ def dump_jit(name, in_feat, out_feat, is_forward):
             name_template = Const.JIT + Const.SEP + result + Const.SEP + str(JitDump.jit_count[result]) + Const.SEP + \
                             Const.BACKWARD
             JitDump.data_collector.update_api_or_module_name(name_template)
-            module_input_output = ModuleBackwardInputsOutputs(grad_input=in_feat ,grad_output=out_feat)
+            module_input_output = ModuleBackwardInputsOutputs(grad_input=in_feat, grad_output=out_feat)
             JitDump.data_collector.backward_data_collect(name_template, None, pid, module_input_output)
@@ -59,15 +61,25 @@ class JitDump(_MindsporeFunctionExecutor):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+        self.name = None
+        if len(args) > 0:
+            self.name = args[0].__name__
         self._executor = PyNativeExecutor_.get_instance()
     def __call__(self, *args, **kwargs):
-        api_register.api_set_ori_func()
+        if JitDump.jit_dump_switch:
+            api_register.api_set_ori_func()
         out = super().__call__(*args, **kwargs)
         if JitDump.jit_dump_switch and len(args) > 0:
-            dump_jit(args[0], args, out, True)
+            if self.name and self.name != "construct":
+                dump_jit(self.name, args, out, True)
+            else:
+                dump_jit(args[0], args, out, True)
             JitDump.jit_enable = True
-        api_register.api_set_hook_func()
+        elif len(args) == 0:
+            logger.warning(f"The jit function {self.name} has no input arguments, nothing will be dumped.")
+        if JitDump.jit_dump_switch:
+            api_register.api_set_hook_func()
         return out
     @classmethod

msprobe/mindspore/dump/kernel_dump/kernel_config.py ADDED Viewed

@@ -0,0 +1,33 @@
+# Copyright (c) 2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from msprobe.core.common.file_utils import save_json
+def create_kernel_config_json(dump_path, cur_rank):
+    kernel_config_name = "kernel_config.json" if cur_rank == '' else f"kernel_config_{cur_rank}.json"
+    kernel_config_path = os.path.join(dump_path, kernel_config_name)
+    config_info = {
+        "dump": {
+            "dump_list": [],
+            "dump_path": dump_path,
+            "dump_mode": "all",
+            "dump_op_switch": "on"
+        }
+    }
+    save_json(kernel_config_path, config_info, indent=4)
+    return kernel_config_path

msprobe/mindspore/dump/kernel_graph_dump.py CHANGED Viewed

@@ -13,10 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import os
-from msprobe.core.common.file_utils import FileOpen, create_directory
+from msprobe.core.common.file_utils import create_directory, save_json
 from msprobe.mindspore.common.log import logger
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
@@ -57,13 +56,19 @@ class KernelGraphDump:
                 self.dump_json["common_dump_settings"]["input_output"] = 2
     def handle(self):
+        try:
+            from msprobe.lib import _msprobe_c
+            return
+        except ImportError:
+            # 如果没有_msprobe_ce_c走MindSpore老流程
+            logger.info("Module _msprobe_c has not been installed, use interface in mindspore instead.")
         if os.getenv("GRAPH_OP_RUN") == "1":
             raise Exception("Must run in graph mode, not kbk mode")
         json_path = self.dump_json["common_dump_settings"]["path"]
         create_directory(json_path)
         json_path = os.path.join(json_path, "kernel_graph_dump.json")
-        with FileOpen(json_path, 'w') as f:
-            json.dump(self.dump_json, f)
+        save_json(json_path, self.dump_json, indent=4)
         logger.info(json_path + " has been created.")
         os.environ["MINDSPORE_DUMP_CONFIG"] = json_path
         if self.dump_json["common_dump_settings"]["dump_mode"] == 0:

msprobe/mindspore/dump/kernel_kbyk_dump.py CHANGED Viewed

@@ -13,11 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import os
 from msprobe.core.common.const import Const
-from msprobe.core.common.file_utils import FileOpen, create_directory
+from msprobe.core.common.file_utils import create_directory, save_json
 from msprobe.mindspore.common.log import logger
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
@@ -70,8 +69,7 @@ class KernelKbykDump:
         json_path = self.dump_json[KernelKbykDump.COMMON_SETTINGS]["path"]
         create_directory(json_path)
         json_path = os.path.join(json_path, "kernel_kbyk_dump.json")
-        with FileOpen(json_path, 'w') as f:
-            json.dump(self.dump_json, f)
+        save_json(json_path, self.dump_json, indent=4)
         logger.info(json_path + " has been created.")
         os.environ["MINDSPORE_DUMP_CONFIG"] = json_path

msprobe/mindspore/dym_loader/hook_dynamic_loader.cc ADDED Viewed

@@ -0,0 +1,140 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hook_dynamic_loader.h"
+#include <sys/stat.h>
+#include <cstdlib>
+#include <cstring>
+#include "utils/log_adapter.h"
+namespace {
+// Utility function to check if a file path is valid
+bool IsValidPath(const std::string &path) {
+  struct stat fileStat;
+  if (stat(path.c_str(), &fileStat) != 0) {
+    MS_LOG(ERROR) << "File does not exist or cannot be accessed: " << path;
+    return false;
+  }
+  if (S_ISLNK(fileStat.st_mode)) {
+    MS_LOG(ERROR) << "File is a symbolic link, which is not allowed: " << path;
+    return false;
+  }
+  if (!S_ISREG(fileStat.st_mode)) {
+    MS_LOG(ERROR) << "File is not a regular file: " << path;
+    return false;
+  }
+  if (path.substr(path.find_last_of(".")) != ".so") {
+    MS_LOG(ERROR) << "File is not a .so file: " << path;
+    return false;
+  }
+  return true;
+}
+}  // namespace
+HookDynamicLoader &HookDynamicLoader::GetInstance() {
+  static HookDynamicLoader instance;
+  return instance;
+}
+bool HookDynamicLoader::loadFunction(void *handle, const std::string &functionName) {
+  void *func = dlsym(handle, functionName.c_str());
+  if (!func) {
+    MS_LOG(WARNING) << "Could not load function: " << functionName << ", error: " << dlerror();
+    return false;
+  }
+  funcMap_[functionName] = func;
+  return true;
+}
+bool HookDynamicLoader::validateLibraryPath(const std::string &libPath) {
+  char *realPath = realpath(libPath.c_str(), nullptr);
+  if (!realPath) {
+    MS_LOG(WARNING) << "Failed to resolve realpath for the library: " << libPath;
+    return false;
+  }
+  bool isValid = IsValidPath(realPath);
+  free(realPath);  // Free memory allocated by realpath
+  return isValid;
+}
+bool HookDynamicLoader::LoadLibrary() {
+  const char *libPath = std::getenv("HOOK_TOOL_PATH");
+  if (!libPath) {
+    MS_LOG(WARNING) << "HOOK_TOOL_PATH is not set!";
+    return false;
+  }
+  std::string resolvedLibPath(libPath);
+  if (!validateLibraryPath(resolvedLibPath)) {
+    MS_LOG(WARNING) << "Library path validation failed.";
+    return false;
+  }
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (handle_) {
+    MS_LOG(WARNING) << "Hook library already loaded!";
+    return false;
+  }
+  handle_ = dlopen(resolvedLibPath.c_str(), RTLD_LAZY | RTLD_LOCAL);
+  if (!handle_) {
+    MS_LOG(WARNING) << "Failed to load Hook library: " << dlerror();
+    return false;
+  }
+  for (const auto &functionName : functionList_) {
+    if (!loadFunction(handle_, functionName)) {
+      MS_LOG(WARNING) << "Failed to load function: " << functionName;
+      dlclose(handle_);
+      handle_ = nullptr;
+      return false;
+    }
+  }
+  MS_LOG(INFO) << "Hook library loaded successfully.";
+  return true;
+}
+bool HookDynamicLoader::UnloadLibrary() {
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (!handle_) {
+    MS_LOG(WARNING) << "Hook library hasn't been loaded.";
+    return false;
+  }
+  dlclose(handle_);
+  handle_ = nullptr;
+  funcMap_.clear();
+  MS_LOG(INFO) << "Library unloaded successfully.";
+  return true;
+}
+void *HookDynamicLoader::GetHooker(const std::string &funcName) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  auto iter = funcMap_.find(funcName);
+  if (iter == funcMap_.end()) {
+    MS_LOG(WARNING) << "Function not found: " << funcName;
+    return nullptr;
+  }
+  return iter->second;
+}

msprobe/mindspore/dym_loader/hook_dynamic_loader.h ADDED Viewed

@@ -0,0 +1,53 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HOOK_DYNAMIC_LOADER_H
+#define HOOK_DYNAMIC_LOADER_H
+#include <dlfcn.h>
+#include <string>
+#include <vector>
+#include <map>
+#include <mutex>
+constexpr auto kHookBegin = "MS_DbgOnStepBegin";
+constexpr auto kHookEnd = "MS_DbgOnStepEnd";
+class HookDynamicLoader {
+ public:
+  static HookDynamicLoader &GetInstance();
+  HookDynamicLoader(const HookDynamicLoader &) = delete;
+  HookDynamicLoader &operator=(const HookDynamicLoader &) = delete;
+  bool LoadLibrary();
+  bool UnloadLibrary();
+  void *GetHooker(const std::string &funcName);
+ private:
+  // Helper functions
+  bool loadFunction(void *handle, const std::string &functionName);
+  bool validateLibraryPath(const std::string &libPath);
+  HookDynamicLoader() = default;
+  void *handle_ = nullptr;
+  std::vector<std::string> functionList_ = {kHookBegin, kHookEnd};
+  std::map<std::string, void *> funcMap_;
+  std::mutex mutex_;
+};
+#endif  // HOOK_DYNAMIC_LOADER_H

msprobe/mindspore/free_benchmark/api_pynative_self_check.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
-# Licensed under the Apache License, Version 2.0  (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -13,24 +13,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import functools
 import importlib
-import inspect
 import os
+import traceback
 import mindspore as ms
-from mindspore.communication import comm_func
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import DistributedNotInitializedError
 from msprobe.core.common.file_utils import check_path_length, load_yaml
 from msprobe.mindspore.common.const import Const as MsConst
 from msprobe.mindspore.common.const import FreeBenchmarkConst
 from msprobe.mindspore.common.log import logger
+from msprobe.mindspore.common.utils import get_rank_if_initialized
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
+from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell
 from msprobe.mindspore.free_benchmark.common.config import Config
-from msprobe.mindspore.free_benchmark.decorator.decorator_factory import decorate_forward_function
+from msprobe.mindspore.free_benchmark.common.handler_params import HandlerParams
+from msprobe.mindspore.free_benchmark.common.utils import Tools
+from msprobe.mindspore.free_benchmark.handler.handler_factory import HandlerFactory
+from msprobe.mindspore.free_benchmark.perturbation.perturbation_factory import PerturbationFactory
+from msprobe.mindspore.runtime import Runtime
-class ApiPyNativeSelFCheck:
+class ApiPyNativeSelfCheck:
     def __init__(self, config: DebuggerConfig):
         Config.is_enable = True
         Config.handler_type = config.handler_type
@@ -39,29 +46,77 @@ class ApiPyNativeSelFCheck:
         Config.dump_level = config.dump_level
         Config.steps = config.step
         Config.ranks = config.rank
-        Config.dump_path = os.path.join(config.dump_path, "free_benchmark.csv")
+        Config.dump_path = os.path.join(config.dump_path, FreeBenchmarkConst.CHECK_RESULT_FILE)
         check_path_length(Config.dump_path)
+        self.ori_func = {}
         self.api_list = config.list
         all_api = get_supported_ops()
         if not self.api_list:
             self.api_list = all_api
         else:
             self.api_list = set(self.api_list) & all_api
+        self.store_original_func()
     def handle(self):
+        api_register.initialize_hook(self.build_hook)
+        api_register.api_set_hook_func()
+    def build_hook(self, api_name):
+        def pre_hook(cell, input_data):
+            return None
+        def forward_hook(api_name_with_id, cell, input_data, output_data):
+            ret = None
+            if not need_wrapper_func():
+                del cell.input_kwargs
+                return ret
+            api_name_with_id = api_name_with_id[:-1]
+            hook_prefix = api_name_with_id[:api_name_with_id.find(Const.SEP) + 1]
+            api_name = (MsConst.HOOK_MS_PREFIX_DICT.get(hook_prefix, "") +
+                        api_name_with_id[api_name_with_id.find(Const.SEP) + 1:api_name_with_id.rfind(Const.SEP)])
+            if api_name in self.api_list:
+                ret = check_self(api_name_with_id, output_data, self.ori_func.get(api_name),
+                                 *input_data, **cell.input_kwargs)
+            del cell.input_kwargs
+            return ret
+        def backward_hook(cell, grad_input, grad_output):
+            pass
+        HOOKCell.get_cell_count(api_name)
+        api_name_with_id = api_name + str(HOOKCell.get_cell_count(api_name)) + Const.SEP
+        forward_hook = functools.partial(forward_hook, api_name_with_id)
+        HOOKCell.add_cell_count(api_name)
+        def wrap_forward_hook(cell, input_data, output_data):
+            return forward_hook(cell, input_data, output_data)
+        def wrap_backward_hook(cell, grad_input, grad_output):
+            return backward_hook(cell, grad_input, grad_output)
+        def pre_backward_hook(cell, grad_input):
+            return None
+        return pre_hook, wrap_forward_hook, wrap_backward_hook, pre_backward_hook
+    def store_original_func(self):
         for api_name in self.api_list:
-            hijack(api_name)
+            self.ori_func[api_name] = get_module(api_name)[1]
 def get_supported_ops():
     supported_ops = []
     cur_path = os.path.dirname(os.path.realpath(__file__))
-    yaml_path = os.path.join(cur_path, "data", "support_wrap_ops.yaml")
+    yaml_path = os.path.join(cur_path, "data", FreeBenchmarkConst.SUPPORTED_CHECK_API_FILE)
-    yaml_data = load_yaml(yaml_path)
+    supported_ops_list = load_yaml(yaml_path)
     for k, v in FreeBenchmarkConst.API_PREFIX_DICT.items():
-        ops = yaml_data.get(k)
+        ops = supported_ops_list.get(k)
         if ops:
             ops = [v + i for i in ops]
             supported_ops += ops
@@ -72,7 +127,7 @@ def get_supported_ops():
     _all_functional_ops += ms_ops
     ms_tensor = dir(ms.Tensor)
-    ms_tensor = [MsConst.Tensor_PREFIX + i for i in ms_tensor]
+    ms_tensor = [MsConst.TENSOR_PREFIX + i for i in ms_tensor]
     _all_functional_ops += ms_tensor
     ms_mint = dir(ms.mint)
@@ -83,49 +138,109 @@ def get_supported_ops():
     ms_mint_nn_func = [MsConst.MINT_NN_FUNC_PREFIX + i for i in ms_mint_nn_func]
     _all_functional_ops += ms_mint_nn_func
-    ms_communication = dir(comm_func)
-    ms_communication = [MsConst.COMM_PREFIX + i for i in ms_communication]
-    _all_functional_ops += ms_communication
     return set(supported_ops) & set(_all_functional_ops)
-def get_decorate_func():
-    return decorate_forward_function
-def is_func_support_decorate(orig_func):
-    return not inspect.isclass(orig_func) and callable(orig_func)
-def get_wrapper_obj(orig_func, api_name):
-    if is_func_support_decorate(orig_func):
-        wrapped_obj = get_decorate_func()(orig_func, api_name)
-    else:
-        wrapped_obj = orig_func
-    return wrapped_obj
 def get_module(api_name):
     func_name_list = api_name.split(Const.SEP)
     func_name = func_name_list[-1]
     module_obj = importlib.import_module(func_name_list[0])
     for i, module_name in enumerate(func_name_list[1:-1]):
         if not hasattr(module_obj, module_name):
-            importlib.import_module(f"{Const.SEP.join(func_name_list[:i+2])}")
+            importlib.import_module(f"{Const.SEP.join(func_name_list[:i + 2])}")
         module_obj = getattr(module_obj, module_name)
     orig_func = getattr(module_obj, func_name)
     return module_obj, orig_func
-def hijack(api_name):
-    if not api_name.strip():
-        return
+def check_self(api_name_with_id, output, ori_func, *args, **kwargs):
+    ret = None
+    if Config.stage == Const.BACKWARD and not (check_all_tensor(args) and check_all_tensor(output)):
+        logger.warning(f"{api_name_with_id} has non-tensor input or output.")
+        return ret
+    params = data_pre_deal(api_name_with_id, ori_func, *args, **kwargs)
+    if params.index == -1:
+        return ret
+    logger.info(f"[{api_name_with_id}] is {Config.handler_type}ing.")
+    api_register.api_set_ori_func()
     try:
-        func_name = api_name.split(Const.SEP)[-1]
-        module_obj, origin_func = get_module(api_name)
-        wrapped_obj = get_wrapper_obj(origin_func, api_name)
-        setattr(module_obj, func_name, wrapped_obj)
+        perturbation = PerturbationFactory.create(api_name_with_id)
+        params.fuzzed_result = perturbation.handle(params)
+        if params.fuzzed_result is False:
+            api_register.api_set_hook_func()
+            return ret
+        if Config.stage == Const.BACKWARD:
+            params.original_result = Tools.get_grad(params.original_func, *params.args, **params.kwargs)
+        else:
+            params.original_result = output
+        ret = deal_fuzzed_and_original_result(api_name_with_id, params)
     except Exception as e:
-        logger.error(f"Failed decorator {api_name}: {e}")
+        logger.error(f"[{api_name_with_id}] Error: {str(e)}")
+        logger.error(f"[{api_name_with_id}] Error detail: {traceback.format_exc()}")
+    api_register.api_set_hook_func()
+    return ret
+def check_all_tensor(input_output):
+    if isinstance(input_output, ms.Tensor):
+        return True
+    if isinstance(input_output, (tuple, list)):
+        return all([check_all_tensor(v) for v in input_output])
+    return False
+def get_target_arg_index(args) -> int:
+    """
+    类型校验
+    """
+    for i, arg in enumerate(args):
+        if ms.ops.is_tensor(arg):
+            if not ms.ops.is_floating_point(arg):
+                continue
+            return i
+        if isinstance(arg, (list, tuple, dict)):
+            return i
+    return -1
+def data_pre_deal(api_name_with_id, func, *args, **kwargs):
+    params = HandlerParams()
+    params.args = args
+    params.kwargs = kwargs
+    params.original_func = func
+    index = get_target_arg_index(args)
+    if index == -1:
+        logger.warning(f"{api_name_with_id} has no supported input type.")
+    params.index = index
+    return params
+def need_wrapper_func():
+    if not (Runtime.is_running and Config.is_enable):
+        return False
+    if Config.steps and Runtime.step_count not in Config.steps:
+        return False
+    if Runtime.rank_id == -1:
+        try:
+            Runtime.rank_id = get_rank_if_initialized()
+        except DistributedNotInitializedError:
+            Runtime.rank_id = -1
+    if Config.ranks and Runtime.rank_id != -1 and Runtime.rank_id not in Config.ranks:
+        return False
+    return True
+def deal_fuzzed_and_original_result(api_name_with_id, params: HandlerParams):
+    handler = HandlerFactory.create(api_name_with_id)
+    result = handler.handle(params)
+    return result

msprobe/mindspore/free_benchmark/common/handler_params.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
-# Licensed under the Apache License, Version 2.0  (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -27,6 +27,5 @@ class HandlerParams:
     original_result: Optional[Any] = None
     fuzzed_result: Optional[Any] = None
     is_consistent: Optional[bool] = True
-    save_flag: Optional[bool] = True
     fuzzed_value: Optional[Any] = None
     original_func: Optional[Callable] = None

msprobe/mindspore/free_benchmark/common/utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
-# Licensed under the Apache License, Version 2.0  (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -17,7 +17,7 @@ from dataclasses import dataclass
 from typing import Any, Optional
 import mindspore as ms
-from mindspore import Tensor
+from mindspore import Tensor, ops
 from msprobe.mindspore.common.const import FreeBenchmarkConst
 from msprobe.mindspore.free_benchmark.common.config import Config
@@ -43,6 +43,23 @@ class Tools:
             return FreeBenchmarkConst.NO_CHANGE_ERROR_THRESHOLD
         return FreeBenchmarkConst.ERROR_THRESHOLD.get(dtype, FreeBenchmarkConst.ERROR_THRESHOLD.get(ms.float32))
+    @staticmethod
+    def get_grad_out(outputs):
+        if isinstance(outputs, Tensor):
+            return ops.ones_like(outputs)
+        if isinstance(outputs, (tuple, list)):
+            return type(outputs)([Tools.get_grad_out(v) for v in outputs])
+        return outputs
+    @staticmethod
+    def get_grad(func, *args, **kwargs):
+        def target_func(*inputs):
+            return func(*inputs, **kwargs)
+        outputs, vjp_fn = ms.vjp(target_func, *args)
+        values = Tools.get_grad_out(outputs)
+        return vjp_fn(values)
 @dataclass
 class UnequalRow:
@@ -73,10 +90,8 @@ def make_unequal_row(
     if isinstance(ratio, float):
         row.max_rel = ratio - 1
     original_tensor = params.original_result
-    fuzzed_tensor = params.fuzzed_result
     if index is not None:
         original_tensor = original_tensor[index]
-        fuzzed_tensor = fuzzed_tensor[index]
         row.output_index = index
     if isinstance(original_tensor, Tensor):
         row.dtype = original_tensor.dtype

mindstudio-probe 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

mindstudio-probe 1.1.0py3-none-any.whl → 1.2.1py3-none-any.whl