PyPI - mindstudio-probe - Versions diffs - 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.0py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (299) hide show

{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/METADATA +7 -6
mindstudio_probe-1.2.1.dist-info/RECORD +396 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/entry_points.txt +0 -1
msprobe/CMakeLists.txt +5 -0
msprobe/README.md +51 -20
msprobe/config.json +2 -3
msprobe/core/advisor/advisor.py +8 -3
msprobe/core/common/const.py +264 -15
msprobe/core/common/exceptions.py +27 -3
msprobe/core/common/file_utils.py +176 -26
msprobe/core/common/inplace_op_checker.py +15 -0
msprobe/core/common/inplace_ops.yaml +3 -0
msprobe/core/common/log.py +27 -9
msprobe/core/common/utils.py +204 -77
msprobe/core/common_config.py +49 -14
msprobe/core/compare/acc_compare.py +274 -198
msprobe/core/compare/check.py +32 -33
msprobe/core/compare/compare_cli.py +32 -14
msprobe/core/compare/highlight.py +283 -127
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +246 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +249 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +95 -0
msprobe/core/compare/merge_result/merge_result.py +380 -0
msprobe/core/compare/merge_result/merge_result_cli.py +31 -0
msprobe/core/compare/multiprocessing_compute.py +2 -2
msprobe/core/compare/npy_compare.py +135 -144
msprobe/core/compare/utils.py +419 -274
msprobe/core/data_dump/data_collector.py +60 -28
msprobe/core/data_dump/data_processor/base.py +84 -36
msprobe/core/data_dump/data_processor/factory.py +5 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +152 -18
msprobe/core/data_dump/data_processor/pytorch_processor.py +267 -110
msprobe/core/data_dump/json_writer.py +29 -1
msprobe/core/data_dump/scope.py +119 -39
msprobe/core/grad_probe/constant.py +27 -13
msprobe/core/grad_probe/grad_compare.py +18 -1
msprobe/core/grad_probe/utils.py +30 -2
msprobe/core/overflow_check/abnormal_scene.py +189 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +96 -7
msprobe/docs/02.config_introduction.md +50 -23
msprobe/docs/03.config_examples.md +2 -9
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +93 -61
msprobe/docs/06.data_dump_MindSpore.md +200 -95
msprobe/docs/07.accuracy_checker_PyTorch.md +28 -28
msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +114 -50
msprobe/docs/11.accuracy_compare_MindSpore.md +340 -48
msprobe/docs/12.overflow_check_PyTorch.md +2 -2
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
msprobe/docs/17.grad_probe.md +5 -6
msprobe/docs/19.monitor.md +561 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +466 -0
msprobe/docs/22.visualization_MindSpore.md +481 -0
msprobe/docs/23.generate_operator_PyTorch.md +107 -0
msprobe/docs/24.code_mapping_Mindspore.md +28 -0
msprobe/docs/25.tool_function_introduction.md +29 -0
msprobe/docs/26.data_dump_PyTorch_baseline.md +37 -0
msprobe/docs/27.dump_json_instruction.md +521 -0
msprobe/docs/FAQ.md +29 -2
msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +14 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +22 -0
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
msprobe/docs/img/visualization/tensorboard_1.png +0 -0
msprobe/docs/img/visualization/tensorboard_2.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_browser_2.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/GPTModel.png +0 -0
msprobe/docs/visualization/ParallelMLP.png +0 -0
msprobe/docs/visualization/layer_mapping_example.md +132 -0
msprobe/docs/visualization/mapping.png +0 -0
msprobe/docs/visualization/mapping1.png +0 -0
msprobe/docs/visualization/module_name.png +0 -0
msprobe/docs/visualization/module_name1.png +0 -0
msprobe/docs/visualization/no_mapping.png +0 -0
msprobe/docs/visualization/no_mapping1.png +0 -0
msprobe/docs/visualization/no_mapping_analyze.png +0 -0
msprobe/docs/visualization/top_layer.png +0 -0
msprobe/mindspore/__init__.py +25 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -151
msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +64 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +64 -31
msprobe/mindspore/api_accuracy_checker/data_manager.py +301 -0
msprobe/mindspore/api_accuracy_checker/main.py +28 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +212 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +60 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +33 -12
msprobe/mindspore/code_mapping/bind.py +264 -0
msprobe/mindspore/code_mapping/cmd_parser.py +40 -0
msprobe/mindspore/code_mapping/graph.py +49 -0
msprobe/mindspore/code_mapping/graph_parser.py +226 -0
msprobe/mindspore/code_mapping/main.py +24 -0
msprobe/mindspore/code_mapping/processor.py +34 -0
msprobe/mindspore/common/const.py +35 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +88 -4
msprobe/mindspore/compare/distributed_compare.py +22 -24
msprobe/mindspore/compare/ms_compare.py +333 -268
msprobe/mindspore/compare/ms_graph_compare.py +95 -52
msprobe/mindspore/debugger/debugger_config.py +7 -1
msprobe/mindspore/debugger/precision_debugger.py +87 -12
msprobe/mindspore/dump/dump_tool_factory.py +3 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +95 -18
msprobe/mindspore/dump/hook_cell/hook_cell.py +60 -38
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +45 -30
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +36 -1
msprobe/mindspore/dump/hook_cell/wrap_api.py +92 -1
msprobe/mindspore/dump/jit_dump.py +17 -5
msprobe/mindspore/dump/kernel_dump/kernel_config.py +33 -0
msprobe/mindspore/dump/kernel_graph_dump.py +9 -4
msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +156 -41
msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
msprobe/mindspore/free_benchmark/common/utils.py +19 -4
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
msprobe/mindspore/grad_probe/global_context.py +28 -8
msprobe/mindspore/grad_probe/grad_analyzer.py +50 -24
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +35 -12
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/mindtorch/__init__.py +18 -0
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +255 -0
msprobe/mindspore/ms_config.py +27 -16
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +9 -4
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +285 -113
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +48 -10
msprobe/pytorch/__init__.py +8 -6
msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
msprobe/pytorch/api_accuracy_checker/common/utils.py +31 -16
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -8
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +103 -271
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +4 -1
msprobe/pytorch/api_accuracy_checker/compare/compare.py +69 -68
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +54 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +51 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +2 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +478 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +106 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +107 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +151 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +226 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +68 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +218 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +104 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +63 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +200 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +63 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +21 -15
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +54 -22
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +140 -71
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +49 -8
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +5 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +142 -16
msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
msprobe/pytorch/bench_functions/swiglu.py +10 -2
msprobe/pytorch/common/parse_json.py +7 -6
msprobe/pytorch/common/utils.py +101 -7
msprobe/pytorch/compare/distributed_compare.py +17 -30
msprobe/pytorch/compare/pt_compare.py +44 -22
msprobe/pytorch/debugger/debugger_config.py +46 -27
msprobe/pytorch/debugger/precision_debugger.py +42 -12
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/dump/module_dump/module_dump.py +86 -0
msprobe/pytorch/{module_processer.py → dump/module_dump/module_processer.py} +81 -10
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +15 -0
msprobe/pytorch/free_benchmark/common/params.py +10 -2
msprobe/pytorch/free_benchmark/common/utils.py +29 -4
msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -5
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +41 -47
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -4
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/__init__.py +1 -1
msprobe/pytorch/hook_module/hook_module.py +14 -11
msprobe/pytorch/hook_module/register_optimizer_hook.py +59 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +35 -0
msprobe/pytorch/hook_module/wrap_distributed.py +6 -8
msprobe/pytorch/hook_module/wrap_functional.py +0 -38
msprobe/pytorch/monitor/__init__.py +0 -0
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +425 -0
msprobe/pytorch/monitor/csv2tb.py +166 -0
msprobe/pytorch/monitor/distributed/__init__.py +0 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +283 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +1076 -0
msprobe/pytorch/monitor/module_metric.py +172 -0
msprobe/pytorch/monitor/module_spec_verifier.py +95 -0
msprobe/pytorch/monitor/optimizer_collect.py +333 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +160 -0
msprobe/pytorch/monitor/utils.py +321 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +29 -38
msprobe/pytorch/online_dispatch/dispatch.py +58 -27
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +53 -32
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
msprobe/pytorch/online_dispatch/utils.py +49 -21
msprobe/pytorch/parse_tool/lib/compare.py +21 -27
msprobe/pytorch/parse_tool/lib/config.py +6 -8
msprobe/pytorch/parse_tool/lib/file_desc.py +15 -1
msprobe/pytorch/parse_tool/lib/interactive_cli.py +10 -10
msprobe/pytorch/parse_tool/lib/parse_exception.py +7 -7
msprobe/pytorch/parse_tool/lib/parse_tool.py +12 -12
msprobe/pytorch/parse_tool/lib/utils.py +33 -53
msprobe/pytorch/parse_tool/lib/visualization.py +11 -10
msprobe/pytorch/pt_config.py +31 -8
msprobe/pytorch/service.py +188 -108
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +222 -0
msprobe/visualization/builder/msprobe_adapter.py +227 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +180 -0
msprobe/visualization/compare/mode_adapter.py +197 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +119 -0
msprobe/visualization/graph/distributed_analyzer.py +318 -0
msprobe/visualization/graph/graph.py +209 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +288 -0
msprobe/visualization/utils.py +217 -0
mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
msprobe/docs/04.acl_config_examples.md +0 -78
msprobe/mindspore/compare/layer_mapping.py +0 -146
msprobe/mindspore/compare/modify_mapping.py +0 -107
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
msprobe/pytorch/functional/module_dump.py +0 -84
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/top_level.txt +0 -0
/msprobe/mindspore/{free_benchmark/decorator → code_mapping}/__init__.py +0 -0
/msprobe/pytorch/{functional → dump/module_dump}/__init__.py +0 -0

msprobe/core/data_dump/data_collector.py CHANGED Viewed

@@ -13,9 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import atexit
 import os
-from msprobe.core.data_dump.scope import build_scope, ListScope
+from msprobe.core.data_dump.scope import ScopeFactory
 from msprobe.core.data_dump.json_writer import DataWriter
 from msprobe.core.common.log import logger
 from msprobe.core.common.const import Const
@@ -27,7 +28,6 @@ def build_data_collector(config):
 class DataCollector:
-    multi_output_apis = ["_sort_", "npu_flash_attention"]
     tasks_need_tensor_data = [Const.OVERFLOW_CHECK, Const.TENSOR, Const.FREE_BENCHMARK]
     level_without_construct = [Const.LEVEL_L1, Const.LEVEL_L2]
@@ -37,13 +37,10 @@ class DataCollector:
         self.data_processor = DataProcessorFactory.create_processor(self.config, self.data_writer)
         self.module_processor = DataProcessorFactory.get_module_processor(self.config.framework)
         self.module_count = {}
-        if self.config.task == Const.FREE_BENCHMARK:
-            self.scope = build_scope(ListScope, self.config.scope, self.config.list)
-        else:
-            self.scope = build_scope(None, self.config.scope, self.config.list)
-    def __del__(self):
-        self.write_json()
+        self.scope = ScopeFactory(self.config).build_scope()
+        self.backward_module_names = {}
+        self.optimizer_status = ""
+        atexit.register(self.write_json)
     @property
     def dump_data_dir(self):
@@ -57,10 +54,6 @@ class DataCollector:
     def check_scope_and_pid(scope, name, pid):
         return (not scope or scope.check(name)) and pid == os.getpid()
-    @staticmethod
-    def is_inplace(module):
-        return getattr(module, "op_is_inplace", False)
     def if_return_forward_new_output(self):
         return self.data_processor.if_return_forward_new_output()
@@ -84,36 +77,54 @@ class DataCollector:
         logger.debug(msg)
         self.data_writer.update_data(data_info)
-    def pre_forward_data_collect(self, name, module, pid, module_input_output):
-        backward_name = name.replace(Const.FORWARD, Const.BACKWARD)
-        if self.check_scope_and_pid(self.scope, backward_name, pid):
-            self.data_processor.analyze_pre_forward(backward_name, module, module_input_output)
-        if not self.is_inplace(module) or not self.check_scope_and_pid(self.scope, name, pid):
+    def forward_input_data_collect(self, name, module, pid, module_input_output):
+        if self.config.task == Const.FREE_BENCHMARK:
+            backward_name = name.replace(Const.FORWARD, Const.BACKWARD)
+            if self.check_scope_and_pid(self.scope, backward_name, pid):
+                self.data_processor.analyze_forward_input(backward_name, module, module_input_output)
+            return
+        if not self.check_scope_and_pid(self.scope, name, pid):
+            return
+        data_info = self.data_processor.analyze_forward_input(name, module, module_input_output)
+        if self.config.level == Const.LEVEL_L2:
             return
-        logger.info(f"API {name} is inplace.")
-        data_info = self.data_processor.analyze_pre_forward_inplace(name, module_input_output)
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
-    def forward_data_collect(self, name, module, pid, module_input_output):
+    def forward_output_data_collect(self, name, module, pid, module_input_output):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        if not self.is_inplace(module):
-            data_info = self.data_processor.analyze_forward(name, module, module_input_output)
-        else:
-            data_info = self.data_processor.analyze_forward_inplace(name, module_input_output)
-        if self.config.level == "L2":
+        data_info = self.data_processor.analyze_forward_output(name, module, module_input_output)
+        if self.config.level == Const.LEVEL_L2:
             return
         self.data_writer.update_stack(self.data_processor.analyze_api_call_stack(name))
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
+    def forward_data_collect(self, name, module, pid, module_input_output):
+        self.update_construct(name)
+        if not self.check_scope_and_pid(self.scope, name, pid):
+            return
+        data_info = self.data_processor.analyze_forward(name, module, module_input_output)
+        self.data_writer.update_stack(self.data_processor.analyze_api_call_stack(name))
+        self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
     def backward_data_collect(self, name, module, pid, module_input_output):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
         data_info = self.data_processor.analyze_backward(name, module, module_input_output)
+        if self.config.level == Const.LEVEL_L2:
+            return
+        # 获取执行反向的模块名称
+        if data_info and name.split(Const.SEP)[0] in Const.MODULE_PREFIX:
+            module_name = name.rsplit(Const.SEP, 2)[0]
+            # 将模块名称加入到反向模块名称集合中，用于梯度收集时判断是否需要收集梯度
+            self.backward_module_names[module_name] = True
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
     def backward_input_data_collect(self, name, module, pid, module_input_output):
@@ -134,12 +145,17 @@ class DataCollector:
     def update_construct(self, name):
         if self.config.level not in DataCollector.level_without_construct:
-            self.data_writer.update_construct({name: self.module_processor.api_parent_node})
+            if self.optimizer_status in [Const.OPTIMIZER, Const.CLIP_GRAD]:
+                self.data_writer.update_construct({name: self.optimizer_status})
+            else:
+                self.data_writer.update_construct({name: self.module_processor.api_parent_node})
             self.data_writer.update_construct(self.module_processor.module_node)
     def handle_data(self, name, data_info, flush=False):
         if data_info:
             self.update_data(name, data_info)
+        if self.config.async_dump:
+            return
         if not flush:
             self.data_writer.flush_data_periodically()
         else:
@@ -147,7 +163,23 @@ class DataCollector:
     def update_dump_paths(self, *args):
         self.data_writer.update_dump_paths(*args)
-        self.data_writer.initialize_json_file(task=self.config.task, level=self.config.level)
+    def initialize_json_file(self, framework=Const.UNKNOWN_FRAMEWORK):
+        self.data_writer.initialize_json_file(task=self.config.task, level=self.config.level, framework=framework)
     def update_iter(self, current_iter):
         self.data_processor.update_iter(current_iter)
+    def params_data_collect(self, name, param_name, pid, data):
+        grad_name = name + Const.SEP + Const.PARAMS_GRAD
+        # 校验scope和pid，以及当前name是否有过反向计算
+        if not self.check_scope_and_pid(self.scope, name, pid) and not self.backward_module_names.get(name):
+            # 如果没有反向计算，则需要清除之前占位写入的grad数据
+            if self.data_writer.cache_data.get("data"):
+                self.data_writer.cache_data.get("data").pop(grad_name, None)
+            return
+        data_info = self.data_processor.analyze_params(grad_name, param_name, data)
+        self.handle_data(grad_name, data_info, flush=self.data_processor.is_terminated)
+    def fill_stack_tensor_data(self):
+        self.data_writer.fill_stack_tensor_data()

msprobe/core/data_dump/data_processor/base.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
-# Licensed under the Apache License, Version 2.0  (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -15,10 +15,11 @@
 import inspect
 import os
-from dataclasses import dataclass
+from dataclasses import dataclass, is_dataclass
 from typing import Tuple, Dict, Optional, Any
 import numpy as np
 from msprobe.core.common.const import Const
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import convert_tuple, CompareException
@@ -38,9 +39,8 @@ class ModuleForwardInputsOutputs:
     def output_tuple(self):
         return convert_tuple(self.output)
-    def concat_args_and_kwargs(self):
-        args = self.args + tuple(self.kwargs.values())
-        return args
+    def update_output_with_args_and_kwargs(self):
+        self.output = self.args + tuple(self.kwargs.values())
 @dataclass
@@ -76,11 +76,12 @@ class ModuleBackwardOutputs:
 class TensorStatInfo:
-    def __init__(self, max_val=None, min_val=None, mean_val=None, norm_val=None):
+    def __init__(self, max_val=None, min_val=None, mean_val=None, norm_val=None, stack_tensor_stat=None):
         self.max = max_val
         self.min = min_val
         self.mean = mean_val
         self.norm = norm_val
+        self.stack_tensor_stat = stack_tensor_stat
 class BaseDataProcessor:
@@ -101,6 +102,9 @@ class BaseDataProcessor:
         self.current_iter = 0
         self._return_forward_new_output = False
         self._forward_new_output = None
+        self.save_name = None
+        if hasattr(config, "data_mode"):
+            self.allowed_data_mode = self._get_allowed_data_mode(config.data_mode)
     @property
     def data_path(self):
@@ -182,6 +186,18 @@ class BaseDataProcessor:
     def _analyze_numpy(value, numpy_type):
         return {"type": numpy_type, "value": value}
+    @staticmethod
+    def _get_allowed_data_mode(data_mode):
+        if Const.ALL in data_mode:
+            allowed_data_mode = [Const.FORWARD, Const.BACKWARD, Const.INPUT, Const.OUTPUT]
+        else:
+            allowed_data_mode = list(set(data_mode))
+            if Const.FORWARD not in allowed_data_mode and Const.BACKWARD not in allowed_data_mode:
+                allowed_data_mode += [Const.FORWARD, Const.BACKWARD]
+            if Const.INPUT not in allowed_data_mode and Const.OUTPUT not in allowed_data_mode:
+                allowed_data_mode += [Const.INPUT, Const.OUTPUT]
+        return allowed_data_mode
     @classmethod
     def get_special_types(cls):
         return cls.special_type
@@ -194,25 +210,42 @@ class BaseDataProcessor:
         if isinstance(args, cls.get_special_types()):
             arg_transform = transform(args, cls._recursive_key_stack)
             return arg_transform
+        elif isinstance(args, tuple) and hasattr(args, '_fields'):
+            # namedtuple to dict
+            args_dict = {field: getattr(args, field) for field in args._fields}
+            return cls.apply_transform_dict(args_dict, transform, depth)
+        elif is_dataclass(args):
+            # dataclass to dict
+            args_dict = {field: getattr(args, field) for field in args.__dataclass_fields__}
+            return cls.apply_transform_dict(args_dict, transform, depth)
         elif isinstance(args, (list, tuple)):
-            result_list = []
-            for i, arg in enumerate(args):
-                cls._recursive_key_stack.append(str(i))
-                result_list.append(cls.recursive_apply_transform(arg, transform, depth=depth + 1))
-                cls._recursive_key_stack.pop()
+            result_list = cls.apply_transform_list(args, transform, depth)
             return type(args)(result_list)
         elif isinstance(args, dict):
-            result_dict = {}
-            for k, arg in args.items():
-                cls._recursive_key_stack.append(str(k))
-                result_dict[k] = cls.recursive_apply_transform(arg, transform, depth=depth + 1)
-                cls._recursive_key_stack.pop()
-            return result_dict
+            return cls.apply_transform_dict(args, transform, depth)
         elif args is not None:
-            logger.warning(f"Data type {type(args)} is not supported.")
+            logger.debug(f"Data type {type(args)} is not supported.")
             return None
         else:
             return None
+    @classmethod
+    def apply_transform_dict(cls, args, transform, depth):
+        result_dict = {}
+        for k, arg in args.items():
+            cls._recursive_key_stack.append(str(k))
+            result_dict[k] = cls.recursive_apply_transform(arg, transform, depth=depth + 1)
+            cls._recursive_key_stack.pop()
+        return result_dict
+    @classmethod
+    def apply_transform_list(cls, args, transform, depth):
+        result_list = []
+        for i, arg in enumerate(args):
+            cls._recursive_key_stack.append(str(i))
+            result_list.append(cls.recursive_apply_transform(arg, transform, depth=depth + 1))
+            cls._recursive_key_stack.pop()
+        return result_list
     def if_return_forward_new_output(self):
         return self._return_forward_new_output
@@ -239,17 +272,12 @@ class BaseDataProcessor:
         Return:
             bool: True if the parameters are in data_mode or data_mode is all, False otherwise.
         """
-        return (Const.ALL in self.config.data_mode or
-                forward_backward in self.config.data_mode or
-                input_output in self.config.data_mode)
-    def analyze_pre_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
-        pass
+        return forward_backward in self.allowed_data_mode and input_output in self.allowed_data_mode
     def analyze_element(self, element):
         return self.recursive_apply_transform(element, self.analyze_single_element)
-    def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+    def analyze_forward_input(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         api_info_struct = {}
         # check whether data_mode contains forward or input
         if self.is_dump_for_data_mode(Const.FORWARD, Const.INPUT):
@@ -261,16 +289,22 @@ class BaseDataProcessor:
             kwargs_info_list = self.analyze_element(module_input_output.kwargs)
             api_info_struct[name][Const.INPUT_KWARGS] = kwargs_info_list
-        # check whether data_mode contains forward or output
+        return api_info_struct
+    def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+        api_info_struct = {}
+        # check whether data_mode contains forward or input
         if self.is_dump_for_data_mode(Const.FORWARD, Const.OUTPUT):
-            api_info_struct[name] = api_info_struct.get(name, {})
+            api_info_struct[name] = {}
             self.api_data_category = Const.OUTPUT
             output_info_list = self.analyze_element(module_input_output.output_tuple)
             api_info_struct[name][Const.OUTPUT] = output_info_list
         return api_info_struct
-    def analyze_pre_forward_inplace(self, name, module_input_output: ModuleForwardInputsOutputs):
+    def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         api_info_struct = {}
+        # check whether data_mode contains forward or input
         if self.is_dump_for_data_mode(Const.FORWARD, Const.INPUT):
             api_info_struct[name] = {}
             self.api_data_category = Const.INPUT
@@ -279,16 +313,18 @@ class BaseDataProcessor:
             self.api_data_category = Const.KWARGS
             kwargs_info_list = self.analyze_element(module_input_output.kwargs)
             api_info_struct[name][Const.INPUT_KWARGS] = kwargs_info_list
-        return api_info_struct
-    def analyze_forward_inplace(self, name, module_input_output: ModuleForwardInputsOutputs):
-        concat_args = module_input_output.concat_args_and_kwargs()
-        api_info_struct = {}
+        # check whether data_mode contains forward or output
         if self.is_dump_for_data_mode(Const.FORWARD, Const.OUTPUT):
-            api_info_struct[name] = {}
+            api_info_struct[name] = api_info_struct.get(name, {})
             self.api_data_category = Const.OUTPUT
-            output_info_list = self.analyze_element(concat_args)
+            output_info_list = self.analyze_element(module_input_output.output_tuple)
             api_info_struct[name][Const.OUTPUT] = output_info_list
+        if name in api_info_struct and hasattr(module_input_output, Const.PARAMS):
+            self.api_data_category = Const.PARAMS
+            api_info_struct[name][Const.PARAMS] = self.analyze_element(getattr(module_input_output, Const.PARAMS))
         return api_info_struct
     def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs):
@@ -329,9 +365,21 @@ class BaseDataProcessor:
             api_info_struct[name][Const.OUTPUT] = output_info_list
         return api_info_struct
+    def analyze_params(self, name, param_name, grad):
+        api_info_struct = {}
+        self.save_name = name + Const.SEP + param_name
+        data_info = self.analyze_element(grad)
+        grad_info_dict = {param_name: [data_info]}
+        api_info_struct[name] = grad_info_dict
+        return api_info_struct
     def get_save_file_path(self, suffix):
         file_format = Const.PT_SUFFIX if self.config.framework == Const.PT_FRAMEWORK else Const.NUMPY_SUFFIX
-        dump_data_name = (self.current_api_or_module_name + Const.SEP + self.api_data_category + Const.SEP +
-                          suffix + file_format)
+        if self.save_name is not None:
+            dump_data_name = (self.save_name + file_format)
+            self.save_name = None
+        else:
+            dump_data_name = (self.current_api_or_module_name + Const.SEP + self.api_data_category + Const.SEP +
+                              suffix + file_format)
         file_path = os.path.join(self.data_writer.dump_tensor_data_dir, dump_data_name)
         return dump_data_name, file_path

msprobe/core/data_dump/data_processor/factory.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -56,7 +56,7 @@ class DataProcessorFactory:
                 FreeBenchmarkDataProcessor as PytorchFreeBenchmarkDataProcessor,
                 KernelDumpDataProcessor as PytorchKernelDumpDataProcessor
             )
-            from msprobe.pytorch.module_processer import ModuleProcesser
+            from msprobe.pytorch.dump.module_dump.module_processer import ModuleProcesser
             cls.register_processor(Const.PT_FRAMEWORK, Const.STATISTICS, PytorchStatisticsDataProcessor)
             cls.register_processor(Const.PT_FRAMEWORK, Const.TENSOR, PytorchTensorDataProcessor)
             cls.register_processor(Const.PT_FRAMEWORK, Const.OVERFLOW_CHECK, PytorchOverflowCheckDataProcessor)
@@ -67,10 +67,12 @@ class DataProcessorFactory:
             from msprobe.core.data_dump.data_processor.mindspore_processor import (
                 StatisticsDataProcessor as MindsporeStatisticsDataProcessor,
                 TensorDataProcessor as MindsporeTensorDataProcessor,
-                OverflowCheckDataProcessor as MindsporeOverflowCheckDataProcessor
+                OverflowCheckDataProcessor as MindsporeOverflowCheckDataProcessor,
+                KernelDumpDataProcessor as MindsporeKernelDumpDataProcessor
             )
             from msprobe.mindspore.cell_processor import CellProcessor
             cls.register_processor(Const.MS_FRAMEWORK, Const.STATISTICS, MindsporeStatisticsDataProcessor)
             cls.register_processor(Const.MS_FRAMEWORK, Const.TENSOR, MindsporeTensorDataProcessor)
             cls.register_processor(Const.MS_FRAMEWORK, Const.OVERFLOW_CHECK, MindsporeOverflowCheckDataProcessor)
+            cls.register_processor(Const.MS_FRAMEWORK, Const.KERNEL_DUMP, MindsporeKernelDumpDataProcessor)
             cls.register_module_processor(Const.MS_FRAMEWORK, CellProcessor)

msprobe/core/data_dump/data_processor/mindspore_processor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
+# Copyright 2024-2025 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 import zlib
 import mindspore as ms
-from mindspore import mint, ops
+from mindspore import mint, ops, hal
 from mindspore._c_expression.typing import Number
 import numpy as np
@@ -28,6 +28,12 @@ from msprobe.mindspore.common.utils import convert_bf16_to_fp32, save_tensor_as_
 from msprobe.mindspore.common.log import logger
 from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+has_adump = True
+try:
+    from msprobe.lib import _msprobe_c
+except ImportError:
+    has_adump = False
 class MindsporeDataProcessor(BaseDataProcessor):
     mindspore_special_type = tuple([ms.Tensor, Number])
@@ -37,6 +43,7 @@ class MindsporeDataProcessor(BaseDataProcessor):
         self.mindspore_object_key = {
             "dtype": self.analyze_dtype_in_kwargs
         }
+        self._async_dump_cache = {}
     @staticmethod
     def get_md5_for_tensor(x):
@@ -49,15 +56,10 @@ class MindsporeDataProcessor(BaseDataProcessor):
     def analyze_dtype_in_kwargs(element):
         return {"type": "mindspore.dtype", "value": str(element)}
-    @classmethod
-    def get_special_types(cls):
-        return super().get_special_types() + cls.mindspore_special_type
-    def get_stat_info(self, data):
+    @staticmethod
+    def get_stat_info_sync(data):
         tensor_stat = TensorStatInfo()
-        if data.numel() == 0:
-            return tensor_stat
-        elif data.dtype == ms.bool_:
+        if data.dtype == ms.bool_:
             data_np = data.asnumpy()
             tensor_stat.max = np.max(data_np).item()
             tensor_stat.min = np.min(data_np).item()
@@ -70,7 +72,7 @@ class MindsporeDataProcessor(BaseDataProcessor):
             tensor_stat.mean = np.mean(data_abs).item()
             tensor_stat.norm = np.linalg.norm(data_abs).item()
         else:
-            if not ops.is_floating_point(data):
+            if not ops.is_floating_point(data) or data.dtype == ms.float64:
                 data = data.to(ms.float32)
             api_register.norm_inner_op_set_ori_func()
             get_max_value = api_register.mint_ops_ori_attr.get("max", mint.max)
@@ -87,6 +89,47 @@ class MindsporeDataProcessor(BaseDataProcessor):
             api_register.norm_inner_op_set_hook_func()
         return tensor_stat
+    @staticmethod
+    def get_stat_info_async(data):
+        tensor_stat = TensorStatInfo()
+        stack_method = api_register.functional_ori_attr.get("stack", ms.ops.stack)
+        if data.dtype == ms.complex64 or data.dtype == ms.complex128:
+            logger.warning("Async dump do not support complex data!")
+            return tensor_stat
+        elif data.dtype == ms.bool_:
+            tensor_stat.stack_tensor_stat = (["Max", "Min"], stack_method([data.any(), data.all()]))
+        elif not data.shape:
+            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], stack_method([data, data, data, data]))
+        else:
+            if not ops.is_floating_point(data) or data.dtype == ms.float64:
+                data = data.to(ms.float32)
+            api_register.norm_inner_op_set_ori_func()
+            get_max_value = api_register.mint_ops_ori_attr.get("max", mint.max)
+            get_min_value = api_register.mint_ops_ori_attr.get("min", mint.min)
+            get_mean_value = api_register.mint_ops_ori_attr.get("mean", mint.mean)
+            if hasattr(mint, "norm"):
+                get_norm_value = api_register.mint_ops_ori_attr.get("norm", mint.norm)
+            else:
+                get_norm_value = api_register.functional_ori_attr.get("norm", ops.norm)
+            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], stack_method(
+                [get_max_value(data), get_min_value(data), get_mean_value(data), get_norm_value(data)]))
+            api_register.norm_inner_op_set_hook_func()
+        return tensor_stat
+    @classmethod
+    def get_special_types(cls):
+        return super().get_special_types() + cls.mindspore_special_type
+    def get_stat_info(self, data):
+        tensor_stat = TensorStatInfo()
+        if data.numel() == 0:
+            return tensor_stat
+        else:
+            if self.config.async_dump:
+                return MindsporeDataProcessor.get_stat_info_async(data)
+            else:
+                return MindsporeDataProcessor.get_stat_info_sync(data)
     def analyze_single_element(self, element, suffix_stack):
         if suffix_stack and suffix_stack[-1] in self.mindspore_object_key:
             return self.mindspore_object_key[suffix_stack[-1]](element)
@@ -107,13 +150,17 @@ class MindsporeDataProcessor(BaseDataProcessor):
         tensor_json = {
             'type': 'mindspore.Tensor',
             'dtype': str(tensor.dtype),
-            'shape': tensor.shape,
-            'Max': self.transfer_type(tensor_stat.max),
-            'Min': self.transfer_type(tensor_stat.min),
-            'Mean': self.transfer_type(tensor_stat.mean),
-            'Norm': self.transfer_type(tensor_stat.norm),
+            'shape': tensor.shape
         }
-        if self.config.summary_mode == Const.MD5:
+        if tensor_stat.stack_tensor_stat is None:
+            tensor_json.update({'Max': self.transfer_type(tensor_stat.max)})
+            tensor_json.update({'Min': self.transfer_type(tensor_stat.min)})
+            tensor_json.update({'Mean': self.transfer_type(tensor_stat.mean)})
+            tensor_json.update({'Norm': self.transfer_type(tensor_stat.norm)})
+        else:
+            tensor_json.update({'tensor_stat': tensor_stat.stack_tensor_stat})
+        if self.config.summary_mode == Const.MD5 and not self.config.async_dump:
             tensor_md5 = self.get_md5_for_tensor(tensor)
             tensor_json.update({Const.MD5: tensor_md5})
         return tensor_json
@@ -124,11 +171,19 @@ class StatisticsDataProcessor(MindsporeDataProcessor):
 class TensorDataProcessor(MindsporeDataProcessor):
+    def dump_async_data(self):
+        for file_path, tensor in self._async_dump_cache.items():
+            save_tensor_as_npy(tensor, file_path)
+        self._async_dump_cache.clear()
     def _analyze_tensor(self, tensor, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
         single_arg = super()._analyze_tensor(tensor, suffix)
         single_arg.update({"data_name": dump_data_name})
-        save_tensor_as_npy(tensor, file_path)
+        if self.config.async_dump:
+            self._async_dump_cache[file_path] = tensor.copy()
+        else:
+            save_tensor_as_npy(tensor, file_path)
         return single_arg
@@ -138,6 +193,7 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor):
     def __init__(self, config, data_writer):
         super().__init__(config, data_writer)
         self.has_overflow = False
+        self.cached_api_info = {}
         self.cached_tensors_and_file_paths = {}
         self.real_overflow_nums = 0
         self.overflow_nums = config.overflow_nums
@@ -150,6 +206,20 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor):
             return True
         return False
+    def analyze_forward_input(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+        self.has_overflow = False
+        self.cached_api_info = super().analyze_forward_input(name, module, module_input_output)
+        return None
+    def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+        api_info_struct = super().analyze_forward_output(name, module, module_input_output)
+        if name in self.cached_api_info and name in api_info_struct:
+            self.cached_api_info[name].update(api_info_struct[name])
+        elif name in api_info_struct:
+            self.cached_api_info = api_info_struct
+        self.maybe_save_overflow_data()
+        return self.cached_api_info if self.has_overflow else None
     def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         self.has_overflow = False
         api_info_struct = super().analyze_forward(name, module, module_input_output)
@@ -161,6 +231,12 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor):
         api_info_struct = super().analyze_backward(name, module, module_input_output)
         self.maybe_save_overflow_data()
         return api_info_struct if self.has_overflow else None
+    def analyze_params(self, name, param_name, grad):
+        self.has_overflow = False
+        api_info_struct = super().analyze_params(name, param_name, grad)
+        self.maybe_save_overflow_data()
+        return api_info_struct if self.has_overflow else None
     def maybe_save_overflow_data(self):
         if self.has_overflow:
@@ -190,3 +266,61 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor):
         self._analyze_maybe_overflow_tensor(single_arg)
         single_arg.update({"data_name": dump_data_name})
         return single_arg
+class KernelDumpDataProcessor(MindsporeDataProcessor):
+    def __init__(self, config, data_writer):
+        super().__init__(config, data_writer)
+        self.enable_kernel_dump = True
+    @staticmethod
+    def start_kernel_dump(config_path):
+        hal.synchronize()
+        _msprobe_c.init_dump()
+        _msprobe_c.set_dump(config_path)
+        hal.synchronize()
+    @staticmethod
+    def stop_kernel_dump():
+        hal.synchronize()
+        _msprobe_c.finalize_dump()
+        hal.synchronize()
+    @staticmethod
+    def _print_unsupported_log(api_name):
+        logger.warning(f"The kernel dump does not support the {api_name} API.")
+    def analyze_forward_input(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        if not has_adump:
+            logger.warning("The current msprobe package does not compile adump, and kernel dump cannot be used.")
+            self.enable_kernel_dump = False
+            return
+        self.start_kernel_dump(self.config.kernel_config_path)
+    def analyze_forward_output(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        self.enable_kernel_dump = False
+        self.stop_kernel_dump()
+        logger.info(f"The kernel data of {name} is dumped successfully.")
+    def analyze_backward_input(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        if not has_adump:
+            logger.warning("The current msprobe package does not compile adump, and kernel dump cannot be used.")
+            self.enable_kernel_dump = False
+            return
+        self.start_kernel_dump(self.config.kernel_config_path)
+    def analyze_backward(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        self.enable_kernel_dump = False
+        self.stop_kernel_dump()
+        logger.info(f"The kernel data of {name} is dumped successfully.")
+    def reset_status(self):
+        self.enable_kernel_dump = True

mindstudio-probe 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

mindstudio-probe 1.1.0py3-none-any.whl → 1.2.1py3-none-any.whl