PyPI - mindstudio-probe - Versions diffs - 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.0py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (299) hide show

{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/METADATA +7 -6
mindstudio_probe-1.2.1.dist-info/RECORD +396 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/entry_points.txt +0 -1
msprobe/CMakeLists.txt +5 -0
msprobe/README.md +51 -20
msprobe/config.json +2 -3
msprobe/core/advisor/advisor.py +8 -3
msprobe/core/common/const.py +264 -15
msprobe/core/common/exceptions.py +27 -3
msprobe/core/common/file_utils.py +176 -26
msprobe/core/common/inplace_op_checker.py +15 -0
msprobe/core/common/inplace_ops.yaml +3 -0
msprobe/core/common/log.py +27 -9
msprobe/core/common/utils.py +204 -77
msprobe/core/common_config.py +49 -14
msprobe/core/compare/acc_compare.py +274 -198
msprobe/core/compare/check.py +32 -33
msprobe/core/compare/compare_cli.py +32 -14
msprobe/core/compare/highlight.py +283 -127
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +246 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +249 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +95 -0
msprobe/core/compare/merge_result/merge_result.py +380 -0
msprobe/core/compare/merge_result/merge_result_cli.py +31 -0
msprobe/core/compare/multiprocessing_compute.py +2 -2
msprobe/core/compare/npy_compare.py +135 -144
msprobe/core/compare/utils.py +419 -274
msprobe/core/data_dump/data_collector.py +60 -28
msprobe/core/data_dump/data_processor/base.py +84 -36
msprobe/core/data_dump/data_processor/factory.py +5 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +152 -18
msprobe/core/data_dump/data_processor/pytorch_processor.py +267 -110
msprobe/core/data_dump/json_writer.py +29 -1
msprobe/core/data_dump/scope.py +119 -39
msprobe/core/grad_probe/constant.py +27 -13
msprobe/core/grad_probe/grad_compare.py +18 -1
msprobe/core/grad_probe/utils.py +30 -2
msprobe/core/overflow_check/abnormal_scene.py +189 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +96 -7
msprobe/docs/02.config_introduction.md +50 -23
msprobe/docs/03.config_examples.md +2 -9
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +93 -61
msprobe/docs/06.data_dump_MindSpore.md +200 -95
msprobe/docs/07.accuracy_checker_PyTorch.md +28 -28
msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +114 -50
msprobe/docs/11.accuracy_compare_MindSpore.md +340 -48
msprobe/docs/12.overflow_check_PyTorch.md +2 -2
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
msprobe/docs/17.grad_probe.md +5 -6
msprobe/docs/19.monitor.md +561 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +466 -0
msprobe/docs/22.visualization_MindSpore.md +481 -0
msprobe/docs/23.generate_operator_PyTorch.md +107 -0
msprobe/docs/24.code_mapping_Mindspore.md +28 -0
msprobe/docs/25.tool_function_introduction.md +29 -0
msprobe/docs/26.data_dump_PyTorch_baseline.md +37 -0
msprobe/docs/27.dump_json_instruction.md +521 -0
msprobe/docs/FAQ.md +29 -2
msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +14 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +22 -0
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
msprobe/docs/img/visualization/tensorboard_1.png +0 -0
msprobe/docs/img/visualization/tensorboard_2.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_browser_2.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/GPTModel.png +0 -0
msprobe/docs/visualization/ParallelMLP.png +0 -0
msprobe/docs/visualization/layer_mapping_example.md +132 -0
msprobe/docs/visualization/mapping.png +0 -0
msprobe/docs/visualization/mapping1.png +0 -0
msprobe/docs/visualization/module_name.png +0 -0
msprobe/docs/visualization/module_name1.png +0 -0
msprobe/docs/visualization/no_mapping.png +0 -0
msprobe/docs/visualization/no_mapping1.png +0 -0
msprobe/docs/visualization/no_mapping_analyze.png +0 -0
msprobe/docs/visualization/top_layer.png +0 -0
msprobe/mindspore/__init__.py +25 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -151
msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +64 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +64 -31
msprobe/mindspore/api_accuracy_checker/data_manager.py +301 -0
msprobe/mindspore/api_accuracy_checker/main.py +28 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +212 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +60 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +33 -12
msprobe/mindspore/code_mapping/bind.py +264 -0
msprobe/mindspore/code_mapping/cmd_parser.py +40 -0
msprobe/mindspore/code_mapping/graph.py +49 -0
msprobe/mindspore/code_mapping/graph_parser.py +226 -0
msprobe/mindspore/code_mapping/main.py +24 -0
msprobe/mindspore/code_mapping/processor.py +34 -0
msprobe/mindspore/common/const.py +35 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +88 -4
msprobe/mindspore/compare/distributed_compare.py +22 -24
msprobe/mindspore/compare/ms_compare.py +333 -268
msprobe/mindspore/compare/ms_graph_compare.py +95 -52
msprobe/mindspore/debugger/debugger_config.py +7 -1
msprobe/mindspore/debugger/precision_debugger.py +87 -12
msprobe/mindspore/dump/dump_tool_factory.py +3 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +95 -18
msprobe/mindspore/dump/hook_cell/hook_cell.py +60 -38
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +45 -30
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +36 -1
msprobe/mindspore/dump/hook_cell/wrap_api.py +92 -1
msprobe/mindspore/dump/jit_dump.py +17 -5
msprobe/mindspore/dump/kernel_dump/kernel_config.py +33 -0
msprobe/mindspore/dump/kernel_graph_dump.py +9 -4
msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +156 -41
msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
msprobe/mindspore/free_benchmark/common/utils.py +19 -4
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
msprobe/mindspore/grad_probe/global_context.py +28 -8
msprobe/mindspore/grad_probe/grad_analyzer.py +50 -24
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +35 -12
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/mindtorch/__init__.py +18 -0
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +255 -0
msprobe/mindspore/ms_config.py +27 -16
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +9 -4
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +285 -113
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +48 -10
msprobe/pytorch/__init__.py +8 -6
msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
msprobe/pytorch/api_accuracy_checker/common/utils.py +31 -16
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -8
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +103 -271
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +4 -1
msprobe/pytorch/api_accuracy_checker/compare/compare.py +69 -68
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +54 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +51 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +2 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +478 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +106 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +107 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +151 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +226 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +68 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +218 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +104 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +63 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +200 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +63 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +21 -15
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +54 -22
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +140 -71
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +49 -8
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +5 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +142 -16
msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
msprobe/pytorch/bench_functions/swiglu.py +10 -2
msprobe/pytorch/common/parse_json.py +7 -6
msprobe/pytorch/common/utils.py +101 -7
msprobe/pytorch/compare/distributed_compare.py +17 -30
msprobe/pytorch/compare/pt_compare.py +44 -22
msprobe/pytorch/debugger/debugger_config.py +46 -27
msprobe/pytorch/debugger/precision_debugger.py +42 -12
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/dump/module_dump/module_dump.py +86 -0
msprobe/pytorch/{module_processer.py → dump/module_dump/module_processer.py} +81 -10
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +15 -0
msprobe/pytorch/free_benchmark/common/params.py +10 -2
msprobe/pytorch/free_benchmark/common/utils.py +29 -4
msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -5
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +41 -47
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -4
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/__init__.py +1 -1
msprobe/pytorch/hook_module/hook_module.py +14 -11
msprobe/pytorch/hook_module/register_optimizer_hook.py +59 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +35 -0
msprobe/pytorch/hook_module/wrap_distributed.py +6 -8
msprobe/pytorch/hook_module/wrap_functional.py +0 -38
msprobe/pytorch/monitor/__init__.py +0 -0
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +425 -0
msprobe/pytorch/monitor/csv2tb.py +166 -0
msprobe/pytorch/monitor/distributed/__init__.py +0 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +283 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +1076 -0
msprobe/pytorch/monitor/module_metric.py +172 -0
msprobe/pytorch/monitor/module_spec_verifier.py +95 -0
msprobe/pytorch/monitor/optimizer_collect.py +333 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +160 -0
msprobe/pytorch/monitor/utils.py +321 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +29 -38
msprobe/pytorch/online_dispatch/dispatch.py +58 -27
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +53 -32
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
msprobe/pytorch/online_dispatch/utils.py +49 -21
msprobe/pytorch/parse_tool/lib/compare.py +21 -27
msprobe/pytorch/parse_tool/lib/config.py +6 -8
msprobe/pytorch/parse_tool/lib/file_desc.py +15 -1
msprobe/pytorch/parse_tool/lib/interactive_cli.py +10 -10
msprobe/pytorch/parse_tool/lib/parse_exception.py +7 -7
msprobe/pytorch/parse_tool/lib/parse_tool.py +12 -12
msprobe/pytorch/parse_tool/lib/utils.py +33 -53
msprobe/pytorch/parse_tool/lib/visualization.py +11 -10
msprobe/pytorch/pt_config.py +31 -8
msprobe/pytorch/service.py +188 -108
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +222 -0
msprobe/visualization/builder/msprobe_adapter.py +227 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +180 -0
msprobe/visualization/compare/mode_adapter.py +197 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +119 -0
msprobe/visualization/graph/distributed_analyzer.py +318 -0
msprobe/visualization/graph/graph.py +209 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +288 -0
msprobe/visualization/utils.py +217 -0
mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
msprobe/docs/04.acl_config_examples.md +0 -78
msprobe/mindspore/compare/layer_mapping.py +0 -146
msprobe/mindspore/compare/modify_mapping.py +0 -107
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
msprobe/pytorch/functional/module_dump.py +0 -84
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/top_level.txt +0 -0
/msprobe/mindspore/{free_benchmark/decorator → code_mapping}/__init__.py +0 -0
/msprobe/pytorch/{functional → dump/module_dump}/__init__.py +0 -0

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py CHANGED Viewed

@@ -16,7 +16,6 @@
 import glob
 import os.path
 import time
-import re
 from multiprocessing import Queue
 from typing import Optional, Union, Dict, Any
 from dataclasses import dataclass
@@ -26,9 +25,8 @@ import torch
 from msprobe.pytorch.api_accuracy_checker.common.utils import ApiData
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.client import TCPClient
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.server import TCPServer
-from msprobe.pytorch.common.utils import logger
 from msprobe.core.common.file_utils import remove_path
-from msprobe.pytorch.common.utils import save_api_data, load_api_data, save_pt, load_pt
+from msprobe.pytorch.common.utils import logger, save_api_data, load_api_data, save_pkl, load_pkl
 BufferType = Union[ApiData, Dict[str, Any], str]  # Union[Tensor, Tuple[Optional[Tensor]]]
@@ -55,7 +53,6 @@ class ATTL:
         self.dequeue_list = []
         self.message_end = False
         self.kill_progress = False
-        self.check_attl_config()
         self.nfs_path = None
         if self.session_config.nfs_path:
             self.nfs_path = self.session_config.nfs_path
@@ -73,18 +70,6 @@ class ATTL:
                                             self.session_config.tls_path)
             self.socket_manager.start()
-    def check_attl_config(self):
-        if self.session_config.nfs_path:
-            if os.path.exists(self.session_config.nfs_path):
-                return
-            else:
-                raise Exception(f"nfs path {self.session_config.nfs_path} doesn't exists.")
-        ipv4_pattern = "([1-9]?\d|1\d{2}|2[0-4]\d|25[0-5])(\.([1-9]?\d|1\d{2}|2[0-4]\d|25[0-5])){3}$"
-        if not re.match(ipv4_pattern, self.session_config.connect_ip):
-            raise Exception(f"host {self.session_config.connect_ip} is invalid.")
-        if not (0 < self.session_config.connect_port <= 65535):
-            raise Exception(f"port {self.session_config.connect_port} is invalid.")
     def stop_serve(self):
         if isinstance(self.socket_manager, TCPServer):
             self.socket_manager.stop()
@@ -115,21 +100,21 @@ class ATTL:
         self.socket_manager.add_to_sending_queue(data, rank=rank, step=step)
     def recv(self, timeout_ms=0) -> Optional[BufferType]:
-        buffer = None
-        while buffer is None:
+        buffer = ''
+        while not buffer:
             if timeout_ms > 0:
                 time.sleep(timeout_ms / 1000.0)
-            if buffer is None and not self.data_queue.empty():
+            if not buffer and not self.data_queue.empty():
                 buffer = self.data_queue.get()
                 break
-            if buffer is None and timeout_ms > 0:  # timeout is the only case we give up and return None
+            if not buffer and timeout_ms > 0:  # timeout is the only case we give up and return None
                 break
             if self.message_end and self.data_queue.empty():
                 buffer = b"KILL_CONFIRM"
                 self.kill_progress = True
                 break
             time.sleep(0.1)  # waiting outside the lock before next attempt
-        if buffer is None:
+        if not buffer:
             # this is a result of a timeout
             self.logger.info(f"RECEIVE API DATA TIMED OUT")
         else:
@@ -146,7 +131,7 @@ class ATTL:
             except Exception as e:
                 self.logger.warning("there is something error. please check it. %s", e)
             if isinstance(buffer, bytes):
-                return None
+                return ''
             if isinstance(buffer, str):
                 return buffer
@@ -160,7 +145,7 @@ class ATTL:
             file_path = os.path.join(self.session_config.nfs_path, buffer + f"_{int(time.time())}")
         try:
-            save_pt(buffer, file_path)
+            save_pkl(buffer, file_path)
         except Exception as e:
             self.logger.warning("there is something error in save_pt. please check it. %s", e)
@@ -176,7 +161,7 @@ class ATTL:
         if cur_file is not None:
             try:
-                buffer = load_pt(cur_file)
+                buffer = load_pkl(cur_file)
             except Exception as e:
                 self.logger.warning("there is something error. please check it. %s", e)
             remove_path(cur_file)

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py CHANGED Viewed

@@ -27,8 +27,8 @@ from twisted.internet import reactor, protocol, endpoints
 from twisted.protocols.basic import FileSender
 from msprobe.pytorch.common.utils import logger
-from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.utils import struct_unpack_mode as unpack_mode, \
-    str_to_bytes_order as bytes_order
+from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.utils import STRUCT_UNPACK_MODE as unpack_mode, \
+    STR_TO_BYTES_ORDER as bytes_order
 MAX_SENDING_QUEUE_SIZE = 20
@@ -84,15 +84,6 @@ class TCPClient:
     def run_reactor():
         reactor.run(installSignalHandlers=False)
-    def check_tls_path(self):
-        client_key = os.path.join(self.tls_path, "client.key")
-        client_crt = os.path.join(self.tls_path, "client.crt")
-        if not os.path.exists(client_key):
-            raise Exception(f"client_key: {client_key} is not exists.")
-        if not os.path.exists(client_crt):
-            raise Exception(f"client_crt: {client_crt} is not exists.")
-        return client_key, client_crt
     def start(self):
         def conn_callback(cur_protocol):
             if cur_protocol.transport and cur_protocol.transport.getPeer().host == self.host:
@@ -114,7 +105,8 @@ class TCPClient:
         self.factory.protocol = cur_protocol
         if self.tls_path:
             from twisted.internet import ssl
-            client_key, client_crt = self.check_tls_path()
+            client_key = os.path.join(self.tls_path, "client.key")
+            client_crt = os.path.join(self.tls_path, "client.crt")
             client_context_factory = ssl.DefaultOpenSSLContextFactory(client_key, client_crt)
             endpoint = endpoints.SSL4ClientEndpoint(reactor, self.host, self.port, client_context_factory)
         else:

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py CHANGED Viewed

@@ -24,7 +24,7 @@ from msprobe.core.common.const import Const, CompareConst
 from msprobe.pytorch.api_accuracy_checker.compare.api_precision_compare import online_api_precision_compare
 from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import DETAIL_TEST_ROWS, thousandth_standard_api, \
     binary_standard_api, absolute_standard_api
-from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api
+from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api, ExecParams
 from msprobe.pytorch.common.log import logger
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import move2target_device
 from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_cpu_params
@@ -92,8 +92,10 @@ def online_precision_compare(api_data, device, common_config, api_precision_csv_
     try:
         # NPU vs CPU
-        cpu_args, cpu_kwargs = generate_cpu_params(npu_args, npu_kwargs, False, api_name)
-        cpu_out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs)
+        cpu_params = generate_cpu_params(npu_args, npu_kwargs, False, api_name)
+        cpu_args, cpu_kwargs = cpu_params.cpu_args, cpu_params.cpu_kwargs
+        cpu_exec_params = ExecParams(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs, False, None)
+        cpu_out = exec_api(cpu_exec_params)
         npu_data_info = UtDataInfo(None, None, npu_out, cpu_out, None, [], None, rank=api_data.rank)
         npu_detail = compare.compare_output(api_full_name, npu_data_info, True)
         npu_data = pd.DataFrame(npu_detail, columns=DETAIL_TEST_ROWS[-1])

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py CHANGED Viewed

@@ -1,3 +1,4 @@
 # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
@@ -14,6 +15,7 @@
 # limitations under the License.
 import os
+from collections import defaultdict
 from functools import wraps
 import torch
@@ -39,7 +41,7 @@ def singleton(cls):
 @singleton
 class Counter:
     def __init__(self) -> None:
-        self.index_dict = {}
+        self.index_dict = defaultdict(int)
 counter = Counter()
@@ -67,9 +69,9 @@ class AccuracyCheckerDispatch(TorchDispatchMode):
         res = func(*args, **kwargs)
         cur_rank = get_tensor_rank(args, res)
-        cur_api_number = self.counter.index_dict.setdefault(aten_api, 0)
+        cur_api_number = self.counter.index_dict[aten_api]
         api_name = f'{Const.ATEN}{Const.SEP}{aten_api}{Const.SEP}{cur_api_number}'
-        logger.info(f"tools is dumping api: {api_name}")
+        logger.info(f"tools is dumping api: {api_name}, rank: {cur_rank}")
         api_data = ApiData(api_name, args, kwargs, res, 0, cur_rank)
         if "device" in api_data.kwargs:
             api_data.kwargs.pop("device")
@@ -98,7 +100,7 @@ def dispatch4data(func, attl, status):
     return wrapper
-def run_ut_dispatch(attl, status):
+def run_ut_dispatch(attl, status, is_recompute=False):
     """
     This function called by online_run_ut.
     It is used to enable or disable dispatch for torch.autograd.backward function.
@@ -106,5 +108,8 @@ def run_ut_dispatch(attl, status):
     Args:
         attl (ATTL):  online_run_ut class ATTL, which is used to upload or send api data to server.
         status (bool): True means enable dispatch, False means disable dispatch.
+        is_recompute (bool): Flag of recompute, which is conflicted with aten api, then skip dispatch4data.
     """
+    if is_recompute:
+        return
     torch.autograd.backward = dispatch4data(torch.autograd.backward, attl, status)

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py CHANGED Viewed

@@ -24,7 +24,7 @@ from twisted.internet import reactor, protocol, endpoints
 from msprobe.pytorch.common.utils import logger
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.utils import cipher_list, \
-    struct_unpack_mode as unpack_mode, str_to_bytes_order as bytes_order
+    STRUCT_UNPACK_MODE as unpack_mode, STR_TO_BYTES_ORDER as bytes_order
 class TCPServer:
@@ -40,22 +40,14 @@ class TCPServer:
     def run_reactor():
         reactor.run(installSignalHandlers=False)
-    def check_tls_path(self):
-        server_key = os.path.join(self.tls_path, "server.key")
-        server_crt = os.path.join(self.tls_path, "server.crt")
-        if not os.path.exists(server_key):
-            raise Exception(f"server_key: {server_key} is not exists.")
-        if not os.path.exists(server_crt):
-            raise Exception(f"server_crt: {server_crt} is not exists.")
-        return server_key, server_crt
     def start(self):
         self.factory.protocol = self.build_protocol
         if self.tls_path:
             from OpenSSL import SSL
             from twisted.internet import ssl
-            server_key, server_crt = self.check_tls_path()
+            server_key = os.path.join(self.tls_path, "server.key")
+            server_crt = os.path.join(self.tls_path, "server.crt")
             server_context_factory = ssl.DefaultOpenSSLContextFactory(server_key, server_crt, SSL.TLSv1_2_METHOD)
             server_context_ = server_context_factory.getContext()
             server_context_.set_cipher_list(cipher_list)

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py CHANGED Viewed

@@ -40,5 +40,5 @@ cipher_list = ":".join(
      "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256"]
 ).encode()
-struct_unpack_mode = "!Q"
-str_to_bytes_order = "big"
+STRUCT_UNPACK_MODE = "!Q"
+STR_TO_BYTES_ORDER = "big"

msprobe/pytorch/bench_functions/confusion_transpose.py CHANGED Viewed

@@ -22,7 +22,11 @@ def npu_confusion_transpose(data, perm, shape, transpose_first):
 def npu_confusion_transpose_backward(grad, perm, shape, transpose_first):
-    shape_cal = shape if transpose_first else [shape[perm_dim] for perm_dim in perm]
+    try:
+        shape_cal = shape if transpose_first else [shape[perm_dim] for perm_dim in perm]
+    except IndexError as e:
+        raise IndexError("npu_confusion_transpose_backward: Invalid perm index for shape") from e
     perm_cal = [0] * len(perm)
     for i, perm_dim in enumerate(perm):
         perm_cal[perm_dim] = i

msprobe/pytorch/bench_functions/matmul_backward.py CHANGED Viewed

@@ -17,6 +17,9 @@ import torch
 def matmul_backward(grad, self, other, mask):
+    if len(mask) < 2:
+        raise RuntimeError("Mask size at least 2")
     grad_self, grad_other = None, None
     dim_self = self.dim()
     dim_other = other.dim()
@@ -24,6 +27,7 @@ def matmul_backward(grad, self, other, mask):
     size_grad = list(grad.size())
     size_self = list(self.size())
     size_other = list(other.size())
     if dim_self == 1 and dim_other == 1:
         grad_self = other.mul(grad) if mask[0] else grad_self
         grad_other = self.mul(grad) if mask[1] else grad_other
@@ -34,19 +38,27 @@ def matmul_backward(grad, self, other, mask):
         grad_self = grad.unsqueeze(0).mm(other.transpose(-1, -2)).squeeze_(0) if mask[0] else grad_self
         grad_other = self.unsqueeze(1).mm(grad.unsqueeze(0)) if mask[1] else grad_other
     elif dim_self >= 3 and (dim_other == 1 or dim_other == 2):
+        if len(size_grad) < 1:
+            raise RuntimeError("size_grad's length at least 1")
         view_size = 1 if dim_other == 1 else size_grad[-1]
         unfolded_grad = (grad.unsqueeze(-1) if dim_other == 1 else grad).contiguous().view(-1, view_size)
         if mask[0]:
             grad_self = unfolded_grad.mm(other.unsqueeze(0) if dim_other == 1 else other.transpose(-1, -2)) \
                 .view(size_self)
         if mask[1]:
+            if len(size_self) < 1:
+                raise RuntimeError("size_self's length at least 1")
             unfolded_self = self.contiguous().view([-1, size_self[-1]])
             grad_other = unfolded_self.transpose(-1, -2).mm(unfolded_grad).view(size_other)
     elif (dim_self == 1 or dim_self == 2) and dim_other >= 3:
+        if len(size_grad) < 2:
+            raise RuntimeError("size_grad's length at least 2")
         view_size = 1 if dim_self == 1 else size_grad[-2]
         unfolded_grad_t = grad.view([-1, view_size]) \
             if dim_self == 1 else grad.transpose(-1, -2).contiguous().view([-1, view_size])
         if mask[0]:
+            if len(size_other) < 2:
+                raise RuntimeError("size_other's length at least 2")
             # create a 2D-matrix from other
             unfolded_other_t = \
                 other.transpose(-1, -2).contiguous().view([-1, size_other[-2]]).transpose(-1, -2)

msprobe/pytorch/bench_functions/npu_fusion_attention.py CHANGED Viewed

@@ -30,6 +30,7 @@
                                        numels=0, prefix=None, sparse_mode=0, gen_mask_parallel=True, sync=False
 """
+from collections import namedtuple
 import torch
 import numpy as np
 from einops import rearrange
@@ -50,8 +51,16 @@ else:
 from msprobe.pytorch.common.utils import logger
 from msprobe.core.common.const import Const, CompareConst
-gtype = torch.float64  # arm host必须选择float64，x86环境选择float32即可，64也行。arm计算很慢，s=8k的场景建议使用x86
-softmax_build_mode = "QKV"  # "MAX_SUM"
+GTYPE = torch.float64  # arm host必须选择float64，x86环境选择float32即可，64也行。arm计算很慢，s=8k的场景建议使用x86
+SOFTMAX_BUILD_MODE = "QKV"  # "MAX_SUM"
+FaForwardParams = namedtuple("FaForwardParams",
+                            ["q", "k", "v", "drop_mask", "atten_mask", "pse", "scale", "keep_prob"])
+FaBackwardParams = namedtuple("FaBackwardParams",
+                            ["dx", "q", "k", "v", "softmax_res", "drop_mask", "pse", "scale", "keep_prob"])
+RebuildSoftmaxParams = namedtuple("RebuildSoftmaxParams",
+                                ["q", "k", "atten_mask", "pse", "scale", "softmax_max", "softmax_sum"])
 def softmax_forward(x):
@@ -99,7 +108,15 @@ def calculate_qk(q, k, atten_mask, pse, scale):
     return qk
-def fusion_attention_forward(q, k, v, drop_mask, atten_mask, pse, scale, keep_prob):
+def fusion_attention_forward(forward_params):
+    q = forward_params.q
+    k = forward_params.k
+    v = forward_params.v
+    drop_mask = forward_params.drop_mask
+    atten_mask = forward_params.atten_mask
+    pse = forward_params.pse
+    scale = forward_params.scale
+    keep_prob = forward_params.keep_prob
     qk = calculate_qk(q, k, atten_mask, pse, scale)
     softmax_res, softmax_max, softmax_sum = softmax_forward(qk)
     if drop_mask is None or len(drop_mask.shape) == 0:
@@ -110,7 +127,16 @@ def fusion_attention_forward(q, k, v, drop_mask, atten_mask, pse, scale, keep_pr
     return y, softmax_max, softmax_sum
-def fusion_attention_backward(dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob):
+def fusion_attention_backward(backward_params):
+    dx = backward_params.dx
+    q = backward_params.q
+    k = backward_params.k
+    v = backward_params.v
+    softmax_res = backward_params.softmax_res
+    drop_mask = backward_params.drop_mask
+    pse = backward_params.pse
+    scale = backward_params.scale
+    keep_prob = backward_params.keep_prob
     dp = torch.matmul(dx, v.permute(0, 1, 3, 2))
     if drop_mask is None or len(drop_mask.shape) == 0:
         drop_res = softmax_res.permute(0, 1, 3, 2)
@@ -166,6 +192,18 @@ def parse_bsnd_args(query, key, head_num, input_layout):
 def convert_from_bnsd(_input, input_layout):
+    """
+    transform qkv from bnsd to input_layout.
+    B: batch_size
+    S: sequence_length
+    N: num_heads
+    D: head_dim
+    Args:
+       _input (torch.Tensor): tensor of shape (B,N,S,D)
+        input_layout (str): "BSH" or "SBH" or "BSND" or "BNSD" or "TND"
+    Returns:
+        tensor of shape (B,N,S,D) or (B,S,N,D) or (S,B,H) or (B,S,H)
+    """
     if input_layout == "BSH":
         # (B,N,S,D)=>(B,S,N*D)
         out = rearrange(_input, 'b n s d -> b s (n d)').contiguous()
@@ -183,7 +221,19 @@ def convert_from_bnsd(_input, input_layout):
 def convert_to_bnsd(_input, n, input_layout):
-    # 默认"BNSD"无需处理
+    """
+    transform qkv from input_layout to bnsd.
+    B: batch_size
+    S: sequence_length
+    N: num_heads
+    D: head_dim
+    Args:
+        _input (torch.Tensor): tensor of shape (B,N,S,D) or (B,S,N,D) or (S,B,H) or (B,S,H)
+        n (int): num_heads
+        input_layout (str):"BSH" or "SBH" or "BSND" or "BNSD" or "TND"
+    Returns:
+        tensor of shape (B,N,S,D)
+    """
     if input_layout == "BSH":
         # (B,S,N*D)=>(B,N,S,D)
         out = rearrange(_input, 'b s (n d) -> b n s d', n=n)
@@ -199,7 +249,68 @@ def convert_to_bnsd(_input, n, input_layout):
         out = _input
     if out.dim() != 4:
         raise ValueError(f"convert qkv format failed with input_layout {input_layout}.")
-    return out.to(gtype)
+    return out.to(GTYPE)
+def convert_from_bsnd(_input, input_layout):
+    """
+    transform qkv from bsnd to input_layout.
+    B: batch_size
+    S: sequence_length
+    N: num_heads
+    D: head_dim
+    Args:
+       _input (torch.Tensor): tensor of shape (B,S,N,D)
+        input_layout (str): "BSH" or "SBH" or "BSND" or "BNSD" or "TND"
+    Returns:
+        tensor of shape (B,N,S,D) or (B,S,N,D) or (S,B,H) or (B,S,H)
+    """
+    if input_layout == "BSH":
+        # (B,S,N,D)=>(B,S,N*D)
+        out = rearrange(_input, 'b s n d -> b s (n d)').contiguous()
+    elif input_layout == "SBH":
+        # (B,S,N,D)=>(S,B,N*D)
+        out = rearrange(_input, 'b s n d -> s b (n d)').contiguous()
+    elif input_layout == "BNSD":
+        # (B,S,N,D)=>(B,N,S,D)
+        out = rearrange(_input, 'b s n d -> b n s d').contiguous()
+    elif input_layout == "TND":
+        raise ValueError(f"input_layout {input_layout} does not supported for now.")
+    else:
+        out = _input
+    return out
+def convert_to_bsnd(_input, n, input_layout):
+    """
+    transform qkv from input_layout to bsnd.
+    B: batch_size
+    S: sequence_length
+    N: num_heads
+    D: head_dim
+    Args:
+        _input (torch.Tensor): tensor of shape (B,N,S,D) or (B,S,N,D) or (S,B,H) or (B,S,H)
+        n (int): num_heads
+        input_layout (str):"BSH" or "SBH" or "BSND" or "BNSD" or "TND"
+    Returns:
+        tensor of shape (B,S,N,D)
+    """
+    if input_layout == "BSH":
+        # (B,S,N*D)=>(B,S,N,D)
+        out = rearrange(_input, 'b s (n d) -> b s n d', n=n)
+    elif input_layout == "SBH":
+        # (S,B,N*D)=>(B,S,N,D)
+        out = rearrange(_input, 's b (n d) -> b s n d', n=n)
+    elif input_layout == "BNSD":
+        # (B,N,S,D)=>(B,S,N,D)
+        out = rearrange(_input, 'b n s d -> b s n d', n=n)
+    elif input_layout == "TND":
+        raise ValueError(f"input_layout {input_layout} does not supported for now.")
+    else:
+        out = _input
+    if out.dim() != 4:
+        raise ValueError(f"convert qkv format failed with input_layout {input_layout}.")
+    return out
 def generate_atten_mask(*args):
@@ -279,15 +390,22 @@ def rebuid_softmax_by_qkv(q, k, atten_mask, pse, scale):
     """
     logger.info("Using QKV to rebuild original softmax")
     qk = calculate_qk(q, k, atten_mask, pse, scale)
-    softmax_res, x_max, x_sum = softmax_forward(qk)
+    softmax_res, _, _ = softmax_forward(qk)
     return softmax_res
-def rebuild_softmax_by_max_sum(q, k, atten_mask, pse, scale, softmax_max, softmax_sum):
+def rebuild_softmax_by_max_sum(softmax_params):
     """
     attention = softmax(QK^T/sqrt(d))V
     softmax(x_i) = e^(x_i - x_max_i) / x_sum_i)
     """
+    q = softmax_params.q
+    k = softmax_params.k
+    atten_mask = softmax_params.atten_mask
+    pse = softmax_params.pse
+    scale = softmax_params.scale
+    softmax_max = softmax_params.softmax_max
+    softmax_sum = softmax_params.softmax_sum
     logger.info("Using softmax_max and softmax_sum to rebuild original softmax")
     qk = calculate_qk(q, k, atten_mask, pse, scale)
     if softmax_max.shape[-1] == 0:
@@ -319,6 +437,10 @@ def get_input_layout(*args, **kwargs):
 def npu_fusion_attention_forward_patch(*args, **kwargs):
+    if len(args) < 2:
+        raise RuntimeError("npu_fusion_attention_forward_patch: length of args should greater than or equal to 2.")
     # query, key, value, head_num, input_layout
     head_num = get_head_num(*args, **kwargs)
     input_layout = get_input_layout(*args, **kwargs)
@@ -413,10 +535,8 @@ def npu_fusion_attention(*args, **kwargs):
     key = convert_to_bnsd(key, n2, input_layout)
     value = convert_to_bnsd(value, n2, input_layout)
     k_new, v_new = generate_kv(key, value, n1, n2)
-    out_golden, softmax_max, softmax_sum = fusion_attention_forward(q=query, k=k_new, v=v_new,
-                                                                    drop_mask=None, atten_mask=atten_mask,
-                                                                    pse=pse, scale=scale,
-                                                                    keep_prob=keep_prob)
+    forward_params = FaForwardParams(query, k_new, v_new, None, atten_mask, pse, scale, keep_prob)
+    out_golden, softmax_max, softmax_sum = fusion_attention_forward(forward_params)
     if out_golden.dim() == 5:
         out_golden = out_golden.reshape(out_golden.size(0), out_golden.size(1) * out_golden.size(2), out_golden.size(3),
                                         out_golden.size(4))
@@ -454,12 +574,13 @@ def npu_fusion_attention_grad(*args, **kwargs):
     value = convert_to_bnsd(value, n2, input_layout)
     k_new, v_new = generate_kv(key, value, n1, n2)
-    if softmax_build_mode == "QKV":
+    if SOFTMAX_BUILD_MODE == "QKV":
         softmax_res = rebuid_softmax_by_qkv(query, k_new, atten_mask, pse, scale_value)
     else:
-        softmax_res = rebuild_softmax_by_max_sum(query, k_new, atten_mask, pse, scale_value, softmax_max, softmax_sum)
-    dq, dk, dv = fusion_attention_backward(dx, query, k_new, v_new, softmax_res, None, pse, scale_value, keep_prob)
+        softmax_params = RebuildSoftmaxParams(query, k_new, atten_mask, pse, scale_value, softmax_max, softmax_sum)
+        softmax_res = rebuild_softmax_by_max_sum(softmax_params)
+    backward_params = FaBackwardParams(dx, query, k_new, v_new, softmax_res, None, pse, scale_value, keep_prob)
+    dq, dk, dv = fusion_attention_backward(backward_params)
     # N不等长适配by cdy
     if not (n1 == n2):
@@ -531,8 +652,13 @@ def gpu_fusion_attention(*args, **kwargs):
     else:
         alibi_slopes = None
+    input_layout = get_input_layout(*args, **kwargs)
+    query = convert_to_bsnd(query, n1, input_layout)
+    key = convert_to_bsnd(key, n2, input_layout)
+    value = convert_to_bsnd(value, n2, input_layout)
     out = flash_attn_func(
         query, key, value, dropout_p=(1 - keep_prob), softmax_scale=scale, causal=causal_switch,
         window_size=(window_left, window_right), alibi_slopes=alibi_slopes, deterministic=deterministic
     )
+    out = convert_from_bsnd(out, input_layout)
     return out, Const.NONE, Const.NONE

msprobe/pytorch/bench_functions/rotary_mul.py CHANGED Viewed

@@ -40,6 +40,9 @@ def npu_rotary_mul_backward(dy_tensor, x, r1, r2):
     x_shape = x.shape
     h = x.float()
     grad = dy_tensor.float()
+    if len(r1_shape) < 4 or len(x_shape) < 4:
+        raise RuntimeError(f"Shape of r1 and x should at least be 4-dimension, "
+                           f"but got r1 shape:{r1_shape}, x shape:{x_shape}")
     condition_1 = (r1_shape[0] == 1
                    and r1_shape[1] == x_shape[1]
                    and r1_shape[2] == 1
@@ -68,4 +71,5 @@ def npu_rotary_mul_backward(dy_tensor, x, r1, r2):
             for j in range(x_shape[2]):
                 r2_grad[:, 0, 0, :] += (x_new2[:, i, j, :] * grad[:, i, j, :])
                 r1_grad[:, 0, 0, :] += (h[:, i, j, :] * grad[:, i, j, :])
     return x.grad.cpu(), r1_grad.cpu(), r2_grad.cpu()

msprobe/pytorch/bench_functions/swiglu.py CHANGED Viewed

@@ -19,7 +19,11 @@ import torch
 def npu_swiglu(x, dim=-1):
     tensor_dtype = x.dtype
-    in_tensors = torch.chunk(x, 2, dim=dim)
+    try:
+        in_tensors = torch.chunk(x, 2, dim=dim)
+    except Exception as e:
+        raise RuntimeError(f"Invalid chunk x into 2 tensors with shape {x.shape} and dimension {dim}") from e
     if tensor_dtype == torch.float32:
         tensor_scalar = torch.sigmoid(torch.mul(in_tensors[0], 1.0))
         output_data = torch.mul(torch.mul(tensor_scalar, in_tensors[0]), in_tensors[1])
@@ -34,7 +38,11 @@ def npu_swiglu(x, dim=-1):
 def npu_swiglu_backward(grad, x, dim=-1):
     tensor_dtype = grad.dtype
-    in_tensors = torch.chunk(x, 2, dim=dim)
+    try:
+        in_tensors = torch.chunk(x, 2, dim=dim)
+    except Exception as e:
+        raise RuntimeError(f"Invalid chunk x into 2 tensors with shape {x.shape} and dimension {dim}") from e
     tensor_grad_out = grad
     if tensor_dtype == torch.float16:

msprobe/pytorch/common/parse_json.py CHANGED Viewed

@@ -13,20 +13,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 from msprobe.core.common.exceptions import ParseJsonException
-from msprobe.core.common.file_utils import FileOpen
+from msprobe.core.common.file_utils import load_json
+from msprobe.core.common.log import logger
 def parse_json_info_forward_backward(json_path):
-    with FileOpen(json_path, 'r') as f:
-        dump_json = json.load(f)
+    dump_json = load_json(json_path)
     real_data_path = dump_json.get("dump_data_dir")
     dump_data = dump_json.get("data")
+    if dump_data is None:
+        raise ParseJsonException(ParseJsonException.InvalidDumpJson,
+                                 "something wrong with dump, no data found in dump.json")
     if not dump_data:
-        raise ParseJsonException(ParseJsonException.InvalidDumpJson, "dump数据中没有data字段")
+        logger.warning("data field is empty, no overflow data found.")
     forward_data = {}
     backward_data = {}

mindstudio-probe 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

mindstudio-probe 1.1.0py3-none-any.whl → 1.2.1py3-none-any.whl