PyPI - mindstudio-probe - Versions diffs - 1.3.0__py3-none-any.whl → 8.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.3.0py3-none-any.whl → 8.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (213) hide show

{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/METADATA +4 -2
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/RECORD +204 -152
msprobe/README.md +32 -1
msprobe/core/__init__.py +17 -0
msprobe/core/common/const.py +120 -21
msprobe/core/common/exceptions.py +2 -2
msprobe/core/common/file_utils.py +279 -50
msprobe/core/common/framework_adapter.py +169 -0
msprobe/core/common/global_lock.py +86 -0
msprobe/core/common/runtime.py +25 -0
msprobe/core/common/utils.py +136 -45
msprobe/core/common_config.py +7 -0
msprobe/core/compare/acc_compare.py +646 -428
msprobe/core/compare/check.py +36 -103
msprobe/core/compare/compare_cli.py +4 -0
msprobe/core/compare/config.py +72 -0
msprobe/core/compare/highlight.py +215 -215
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -0
msprobe/core/compare/merge_result/merge_result.py +4 -4
msprobe/core/compare/multiprocessing_compute.py +223 -110
msprobe/core/compare/npy_compare.py +2 -4
msprobe/core/compare/utils.py +214 -244
msprobe/core/config_check/__init__.py +17 -0
msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
msprobe/core/config_check/checkers/base_checker.py +60 -0
msprobe/core/config_check/checkers/dataset_checker.py +138 -0
msprobe/core/config_check/checkers/env_args_checker.py +96 -0
msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
msprobe/core/config_check/checkers/pip_checker.py +90 -0
msprobe/core/config_check/checkers/random_checker.py +367 -0
msprobe/core/config_check/checkers/weights_checker.py +147 -0
msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
msprobe/core/config_check/config_check_cli.py +51 -0
msprobe/core/config_check/config_checker.py +100 -0
msprobe/{mindspore/runtime.py → core/config_check/resource/dependency.yaml} +7 -4
msprobe/core/config_check/resource/env.yaml +57 -0
msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
msprobe/core/config_check/utils/utils.py +107 -0
msprobe/core/data_dump/api_registry.py +67 -4
msprobe/core/data_dump/data_collector.py +170 -89
msprobe/core/data_dump/data_processor/base.py +72 -51
msprobe/core/data_dump/data_processor/mindspore_processor.py +109 -55
msprobe/core/data_dump/data_processor/pytorch_processor.py +90 -82
msprobe/core/data_dump/json_writer.py +143 -27
msprobe/core/debugger/precision_debugger.py +144 -0
msprobe/core/grad_probe/constant.py +1 -1
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/grad_probe/utils.py +1 -1
msprobe/core/hook_manager.py +242 -0
msprobe/core/monitor/anomaly_processor.py +384 -0
msprobe/core/service.py +357 -0
msprobe/core/single_save/__init__.py +0 -0
msprobe/core/single_save/single_comparator.py +243 -0
msprobe/core/single_save/single_saver.py +146 -0
msprobe/docs/01.installation.md +6 -5
msprobe/docs/02.config_introduction.md +79 -22
msprobe/docs/03.config_examples.md +1 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +118 -49
msprobe/docs/06.data_dump_MindSpore.md +167 -20
msprobe/docs/07.accuracy_checker_PyTorch.md +2 -2
msprobe/docs/08.accuracy_checker_online_PyTorch.md +69 -9
msprobe/docs/09.accuracy_checker_MindSpore.md +18 -6
msprobe/docs/10.accuracy_compare_PyTorch.md +212 -74
msprobe/docs/11.accuracy_compare_MindSpore.md +87 -37
msprobe/docs/12.overflow_check_PyTorch.md +2 -2
msprobe/docs/13.overflow_check_MindSpore.md +2 -2
msprobe/docs/14.data_parse_PyTorch.md +3 -3
msprobe/docs/17.grad_probe.md +2 -1
msprobe/docs/18.online_dispatch.md +2 -2
msprobe/docs/19.monitor.md +90 -44
msprobe/docs/21.visualization_PyTorch.md +68 -15
msprobe/docs/22.visualization_MindSpore.md +71 -18
msprobe/docs/25.tool_function_introduction.md +23 -22
msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
msprobe/docs/27.dump_json_instruction.md +1 -1
msprobe/docs/28.debugger_save_instruction.md +111 -20
msprobe/docs/29.data_dump_MSAdapter.md +2 -2
msprobe/docs/30.overflow_check_MSAdapter.md +2 -2
msprobe/docs/31.config_check.md +95 -0
msprobe/docs/32.ckpt_compare.md +69 -0
msprobe/docs/33.generate_operator_MindSpore.md +181 -0
msprobe/docs/34.RL_collect.md +92 -0
msprobe/docs/35.nan_analyze.md +72 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/save_compare_result_sample.png +0 -0
msprobe/docs/img/visualization/proxy.png +0 -0
msprobe/mindspore/__init__.py +1 -2
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +150 -58
msprobe/mindspore/api_accuracy_checker/api_runner.py +7 -3
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +47 -69
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +0 -1
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -2
msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +460 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +9 -0
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/cell_processor.py +204 -33
msprobe/mindspore/code_mapping/graph_parser.py +4 -21
msprobe/mindspore/common/const.py +17 -7
msprobe/mindspore/common/utils.py +128 -11
msprobe/mindspore/compare/common_dir_compare.py +382 -0
msprobe/mindspore/compare/distributed_compare.py +2 -26
msprobe/mindspore/compare/ms_compare.py +17 -405
msprobe/mindspore/compare/ms_graph_compare.py +14 -5
msprobe/mindspore/compare/utils.py +37 -0
msprobe/mindspore/debugger/debugger_config.py +53 -3
msprobe/mindspore/debugger/precision_debugger.py +72 -91
msprobe/mindspore/dump/cell_dump_process.py +877 -0
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +864 -0
msprobe/mindspore/dump/dump_tool_factory.py +13 -5
msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
msprobe/mindspore/dump/hook_cell/api_register.py +40 -6
msprobe/mindspore/dump/hook_cell/hook_cell.py +18 -7
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +18 -0
msprobe/mindspore/dump/jit_dump.py +21 -18
msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -15
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +12 -6
msprobe/mindspore/free_benchmark/common/utils.py +1 -1
msprobe/mindspore/grad_probe/global_context.py +7 -2
msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
msprobe/mindspore/mindspore_service.py +114 -0
msprobe/mindspore/monitor/common_func.py +52 -0
msprobe/mindspore/monitor/data_writers.py +237 -0
msprobe/mindspore/monitor/features.py +20 -7
msprobe/mindspore/monitor/module_hook.py +281 -209
msprobe/mindspore/monitor/optimizer_collect.py +334 -0
msprobe/mindspore/monitor/utils.py +25 -5
msprobe/mindspore/ms_config.py +16 -15
msprobe/mindspore/task_handler_factory.py +5 -2
msprobe/msprobe.py +19 -0
msprobe/nan_analyze/__init__.py +14 -0
msprobe/nan_analyze/analyzer.py +255 -0
msprobe/nan_analyze/graph.py +189 -0
msprobe/nan_analyze/utils.py +211 -0
msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +20 -20
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +4 -7
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +204 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +12 -11
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +1 -0
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +8 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +2 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +156 -0
msprobe/pytorch/attl_manager.py +65 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
msprobe/pytorch/common/utils.py +26 -14
msprobe/pytorch/compare/distributed_compare.py +4 -36
msprobe/pytorch/compare/pt_compare.py +13 -84
msprobe/pytorch/compare/utils.py +47 -0
msprobe/pytorch/debugger/debugger_config.py +34 -17
msprobe/pytorch/debugger/precision_debugger.py +66 -118
msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
msprobe/pytorch/dump/module_dump/module_dump.py +11 -58
msprobe/pytorch/dump/module_dump/module_processer.py +143 -113
msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
msprobe/pytorch/hook_module/api_register.py +29 -5
msprobe/pytorch/hook_module/hook_module.py +9 -18
msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +22 -1
msprobe/pytorch/hook_module/utils.py +28 -2
msprobe/pytorch/monitor/csv2tb.py +6 -2
msprobe/pytorch/monitor/data_writers.py +259 -0
msprobe/pytorch/monitor/module_hook.py +227 -158
msprobe/pytorch/monitor/module_metric.py +14 -0
msprobe/pytorch/monitor/optimizer_collect.py +242 -270
msprobe/pytorch/monitor/utils.py +16 -3
msprobe/pytorch/online_dispatch/dispatch.py +4 -2
msprobe/pytorch/online_dispatch/dump_compare.py +5 -2
msprobe/pytorch/parse_tool/lib/utils.py +3 -3
msprobe/pytorch/pt_config.py +8 -7
msprobe/pytorch/pytorch_service.py +73 -0
msprobe/visualization/builder/graph_builder.py +33 -13
msprobe/visualization/builder/msprobe_adapter.py +24 -11
msprobe/visualization/compare/graph_comparator.py +53 -45
msprobe/visualization/compare/mode_adapter.py +31 -1
msprobe/visualization/graph/base_node.py +3 -3
msprobe/visualization/graph/graph.py +2 -2
msprobe/visualization/graph_service.py +250 -103
msprobe/visualization/utils.py +27 -11
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -106
msprobe/mindspore/monitor/anomaly_detect.py +0 -404
msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
msprobe/mindspore/service.py +0 -549
msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
msprobe/pytorch/monitor/anomaly_detect.py +0 -410
msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
msprobe/pytorch/service.py +0 -473
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
/msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
/msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0

msprobe/pytorch/api_accuracy_checker/compare/compare.py CHANGED Viewed

@@ -40,6 +40,7 @@ from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import check_dty
     DETAIL_TEST_ROWS, BENCHMARK_COMPARE_SUPPORT_LIST
 from msprobe.pytorch.api_accuracy_checker.common.utils import extract_basic_api_segments
 from msprobe.pytorch.common.log import logger
+from msprobe.core.common.decorator import recursion_depth_decorator
 ResultInfo = namedtuple('ResultInfo', ['full_api_name', 'fwd_success_status', 'bwd_success_status',
@@ -178,6 +179,41 @@ class Comparator:
             if not os.path.exists(detail_save_path):
                 write_csv(DETAIL_TEST_ROWS, detail_save_path)
+    @recursion_depth_decorator("compare_core")
+    def _compare_core(self, api_name, bench_output, device_output):
+        compare_column = CompareColumn()
+        if not isinstance(bench_output, type(device_output)):
+            status = CompareConst.ERROR
+            message = "bench and npu output type is different."
+        elif isinstance(bench_output, dict):
+            b_keys, n_keys = set(bench_output.keys()), set(device_output.keys())
+            if b_keys != n_keys:
+                status = CompareConst.ERROR
+                message = "bench and npu output dict keys are different."
+            else:
+                status, compare_column, message = self._compare_core(api_name, list(bench_output.values()),
+                                                                     list(device_output.values()))
+        elif isinstance(bench_output, torch.Tensor):
+            copy_bench_out = bench_output.detach().clone()
+            copy_device_output = device_output.detach().clone()
+            compare_column.bench_type = str(copy_bench_out.dtype)
+            compare_column.npu_type = str(copy_device_output.dtype)
+            compare_column.shape = tuple(device_output.shape)
+            status, compare_column, message = self._compare_torch_tensor(api_name, copy_bench_out, copy_device_output,
+                                                                         compare_column)
+        elif isinstance(bench_output, (bool, int, float, str)):
+            compare_column.bench_type = str(type(bench_output))
+            compare_column.npu_type = str(type(device_output))
+            status, compare_column, message = self._compare_builtin_type(bench_output, device_output, compare_column)
+        elif bench_output is None:
+            status = CompareConst.SKIP
+            message = "Bench output is None, skip this test."
+        else:
+            status = CompareConst.ERROR
+            message = "Unexpected output type in compare_core: {}".format(type(bench_output))
+        return status, compare_column, message
     def write_summary_csv(self, test_result):
         test_rows = []
         try:
@@ -293,40 +329,6 @@ class Comparator:
                     test_final_success = CompareConst.WARNING
         return test_final_success, detailed_result_total
-    def _compare_core(self, api_name, bench_output, device_output):
-        compare_column = CompareColumn()
-        if not isinstance(bench_output, type(device_output)):
-            status = CompareConst.ERROR
-            message = "bench and npu output type is different."
-        elif isinstance(bench_output, dict):
-            b_keys, n_keys = set(bench_output.keys()), set(device_output.keys())
-            if b_keys != n_keys:
-                status = CompareConst.ERROR
-                message = "bench and npu output dict keys are different."
-            else:
-                status, compare_column, message = self._compare_core(api_name, list(bench_output.values()),
-                                                                     list(device_output.values()))
-        elif isinstance(bench_output, torch.Tensor):
-            copy_bench_out = bench_output.detach().clone()
-            copy_device_output = device_output.detach().clone()
-            compare_column.bench_type = str(copy_bench_out.dtype)
-            compare_column.npu_type = str(copy_device_output.dtype)
-            compare_column.shape = tuple(device_output.shape)
-            status, compare_column, message = self._compare_torch_tensor(api_name, copy_bench_out, copy_device_output,
-                                                                         compare_column)
-        elif isinstance(bench_output, (bool, int, float, str)):
-            compare_column.bench_type = str(type(bench_output))
-            compare_column.npu_type = str(type(device_output))
-            status, compare_column, message = self._compare_builtin_type(bench_output, device_output, compare_column)
-        elif bench_output is None:
-            status = CompareConst.SKIP
-            message = "Bench output is None, skip this test."
-        else:
-            status = CompareConst.ERROR
-            message = "Unexpected output type in compare_core: {}".format(type(bench_output))
-        return status, compare_column, message
     def _compare_torch_tensor(self, api_name, bench_output, device_output, compare_column):
         cpu_shape = bench_output.shape
         npu_shape = device_output.shape

msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py CHANGED Viewed

@@ -73,27 +73,27 @@ DETAIL_TEST_ROWS = [
 precision_configs = {
-    torch.float16 : {
-        'small_value' : [
+    torch.float16: {
+        'small_value': [
             1e-3
         ],
-        'small_value_atol' : [
+        'small_value_atol': [
             1e-5
         ]
     },
     torch.bfloat16: {
-        'small_value' : [
+        'small_value': [
             1e-3
         ],
-        'small_value_atol' : [
+        'small_value_atol': [
             1e-5
         ]
     },
-    torch.float32:{
-        'small_value' : [
+    torch.float32: {
+        'small_value': [
             1e-6
         ],
-        'small_value_atol' : [
+        'small_value_atol': [
             1e-9
         ]
     }
@@ -101,33 +101,33 @@ precision_configs = {
 ULP_PARAMETERS = {
-    torch.float16 : {
-        'min_eb' : [
+    torch.float16: {
+        'min_eb': [
             -14
         ],
-        'exponent_num' : [
+        'exponent_num': [
             10
         ]
     },
-    torch.bfloat16 : {
-        'min_eb' : [
+    torch.bfloat16: {
+        'min_eb': [
             -126
         ],
-        'exponent_num' : [
+        'exponent_num': [
             7
         ]
     },
-    torch.float32 : {
-        'min_eb' : [
+    torch.float32: {
+        'min_eb': [
             -126
         ],
-        'exponent_num' : [
+        'exponent_num': [
             23
         ]
     }
 }
 class ApiPrecisionCompareColumn:
     API_NAME = 'API Name'
     DEVICE_DTYPE = 'DEVICE Dtype'
@@ -202,7 +202,7 @@ class ApiPrecisionCompareColumn:
 CompareMessage = {
-    "topk" : "在npu上，topk的入参sorted=False时不生效，会返回有序tensor，而cpu上会返回无序tensor。 如果topk精度不达标，请检查是否是该原因导致的。"
+    "topk": "在npu上，topk的入参sorted=False时不生效，会返回有序tensor，而cpu上会返回无序tensor。 如果topk精度不达标，请检查是否是该原因导致的。"
 }

msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py CHANGED Viewed

@@ -411,19 +411,16 @@ class OperatorScriptGenerator:
         return kwargs_dict_generator
 def _op_generator_parser(parser):
-    parser.add_argument("-i", "--config_input", dest="config_input", default='', type=str,
-                        help="<Optional> Path of config json file", required=True)
+    parser.add_argument("-i", "--config_input", dest="config_input", type=str,
+                        help="<Required> Path of config json file", required=True)
     parser.add_argument("-o", "--api_output_path", dest="api_output_path", type=str,
-                        help="<Required> Path of extract api_name.json.",
-                        required=True)
+                        help="<Required> Path of extract api_name.json.", required=True)
 def parse_json_config(json_file_path):
     if not json_file_path:
-        config_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-        json_file_path = os.path.join(config_dir, "config.json")
+        raise Exception("config_input path can not be empty, please check.")
     json_config = load_json(json_file_path)
     common_config = CommonConfig(json_config)
     return common_config

msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template CHANGED Viewed

@@ -1,6 +1,6 @@
-import json
 import os
-import math
+import re
+import stat
 from enum import Enum, auto
 import torch
 try:
@@ -25,6 +25,31 @@ RAISE_PRECISION = {{
 }}
 THOUSANDTH_THRESHOLDING = 0.001
 BACKWARD = 'backward'
+DIR = "dir"
+FILE = "file"
+READ_ABLE = "read"
+WRITE_ABLE = "write"
+READ_WRITE_ABLE = "read and write"
+DIRECTORY_LENGTH = 4096
+FILE_NAME_LENGTH = 255
+SOFT_LINK_ERROR = "检测到软链接"
+FILE_PERMISSION_ERROR = "文件权限错误"
+INVALID_FILE_ERROR = "无效文件"
+ILLEGAL_PATH_ERROR = "非法文件路径"
+ILLEGAL_PARAM_ERROR = "非法打开方式"
+FILE_TOO_LARGE_ERROR = "文件过大"
+FILE_VALID_PATTERN = r"^[a-zA-Z0-9_.:/-]+$"
+FILE_SIZE_DICT = {{
+    ".pkl": 1073741824,  # 1 * 1024 * 1024 * 1024
+    ".npy": 10737418240,  # 10 * 1024 * 1024 * 1024
+    ".json": 1073741824,  # 1 * 1024 * 1024 * 1024
+    ".pt": 10737418240,  # 10 * 1024 * 1024 * 1024
+    ".csv": 1073741824,  # 1 * 1024 * 1024 * 1024
+    ".xlsx": 1073741824,  # 1 * 1024 * 1024 * 1024
+    ".yaml": 1073741824,  # 1 * 1024 * 1024 * 1024
+    ".ir": 1073741824  # 1 * 1024 * 1024 * 1024
+}}
+COMMOM_FILE_SIZE = 1048576  # 1 * 1024 * 1024
 class CompareStandard(Enum):
     BINARY_EQUALITY_STANDARD = auto()
@@ -33,8 +58,184 @@ class CompareStandard(Enum):
     BENCHMARK_STANDARD = auto()
     THOUSANDTH_STANDARD = auto()
+class FileChecker:
+    """
+    The class for check file.
+    Attributes:
+        file_path: The file or dictionary path to be verified.
+        path_type: file or dictionary
+        ability(str): FileCheckConst.WRITE_ABLE or FileCheckConst.READ_ABLE to set file has writability or readability
+        file_type(str): The correct file type for file
+    """
+    def __init__(self, file_path, path_type, ability=None, file_type=None, is_script=True):
+        self.file_path = file_path
+        self.path_type = self._check_path_type(path_type)
+        self.ability = ability
+        self.file_type = file_type
+        self.is_script = is_script
+    @staticmethod
+    def _check_path_type(path_type):
+        if path_type not in [DIR, FILE]:
+            print(f'ERROR: The path_type must be {{DIR}} or {{FILE}}.')
+            raise Exception(ILLEGAL_PARAM_ERROR)
+        return path_type
+    def common_check(self):
+        """
+        功能：用户校验基本文件权限：软连接、文件长度、是否存在、读写权限、文件属组、文件特殊字符
+        注意：文件后缀的合法性，非通用操作，可使用其他独立接口实现
+        """
+        FileChecker.check_path_exists(self.file_path)
+        FileChecker.check_link(self.file_path)
+        self.file_path = os.path.realpath(self.file_path)
+        FileChecker.check_path_length(self.file_path)
+        FileChecker.check_path_type(self.file_path, self.path_type)
+        self.check_path_ability()
+        if self.is_script:
+            FileChecker.check_path_owner_consistent(self.file_path)
+        FileChecker.check_path_pattern_valid(self.file_path)
+        FileChecker.check_common_file_size(self.file_path)
+        FileChecker.check_file_suffix(self.file_path, self.file_type)
+        if self.path_type == FILE:
+            FileChecker.check_dirpath_before_read(self.file_path)
+        return self.file_path
+    def check_path_ability(self):
+        if self.ability == WRITE_ABLE:
+            FileChecker.check_path_writability(self.file_path)
+        if self.ability == READ_ABLE:
+            FileChecker.check_path_readability(self.file_path)
+        if self.ability == READ_WRITE_ABLE:
+            FileChecker.check_path_readability(self.file_path)
+            FileChecker.check_path_writability(self.file_path)
+    @staticmethod
+    def check_path_exists(path):
+        if not os.path.exists(path):
+            print(f'ERROR: The file path %s does not exist.' % path)
+            raise Exception()
+    @staticmethod
+    def check_link(path):
+        abs_path = os.path.abspath(path)
+        if os.path.islink(abs_path):
+            print('ERROR: The file path {{}} is a soft link.'.format(path))
+            raise Exception(SOFT_LINK_ERROR)
+    @staticmethod
+    def check_path_length(path, name_length=None):
+        file_max_name_length = name_length if name_length else FILE_NAME_LENGTH
+        if len(path) > DIRECTORY_LENGTH or \
+                len(os.path.basename(path)) > file_max_name_length:
+            print(f'ERROR: The file path length exceeds limit.')
+            raise Exception(ILLEGAL_PATH_ERROR)
+    @staticmethod
+    def check_path_type(file_path, file_type):
+        if file_type == FILE:
+            if not os.path.isfile(file_path):
+                print(f"ERROR: The {{file_path}} should be a file!")
+                raise Exception(INVALID_FILE_ERROR)
+        if file_type == DIR:
+            if not os.path.isdir(file_path):
+                print(f"ERROR: The {{file_path}} should be a dictionary!")
+                raise Exception(INVALID_FILE_ERROR)
+    @staticmethod
+    def check_path_owner_consistent(path):
+        file_owner = os.stat(path).st_uid
+        if file_owner != os.getuid() and os.getuid() != 0:
+            print('ERROR: The file path %s may be insecure because is does not belong to you.' % path)
+            raise Exception(FILE_PERMISSION_ERROR)
+    @staticmethod
+    def check_path_pattern_valid(path):
+        if not re.match(FILE_VALID_PATTERN, path):
+            print('ERROR: The file path %s contains special characters.' % (path))
+            raise Exception(ILLEGAL_PATH_ERROR)
+    @staticmethod
+    def check_common_file_size(file_path):
+        if os.path.isfile(file_path):
+            for suffix, max_size in FILE_SIZE_DICT.items():
+                if file_path.endswith(suffix):
+                    FileChecker.check_file_size(file_path, max_size)
+                    return
+            FileChecker.check_file_size(file_path, COMMOM_FILE_SIZE)
+    @staticmethod
+    def check_file_size(file_path, max_size):
+        try:
+            file_size = os.path.getsize(file_path)
+        except OSError as os_error:
+            print(f'ERROR: Failed to open "{{file_path}}". {{str(os_error)}}')
+            raise Exception(INVALID_FILE_ERROR) from os_error
+        if file_size >= max_size:
+            print(f'ERROR: The size ({{file_size}}) of {{file_path}} exceeds ({{max_size}}) bytes, tools not support.')
+            raise Exception(FILE_TOO_LARGE_ERROR)
+    @staticmethod
+    def check_file_suffix(file_path, file_suffix):
+        if file_suffix:
+            if not file_path.endswith(file_suffix):
+                print(f"The {{file_path}} should be a {{file_suffix}} file!")
+                raise Exception(INVALID_FILE_ERROR)
+    @staticmethod
+    def check_dirpath_before_read(path):
+        path = os.path.realpath(path)
+        dirpath = os.path.dirname(path)
+        if FileChecker.check_others_writable(dirpath):
+            print(f"WARNING: The directory is writable by others: {{dirpath}}.")
+        try:
+            FileChecker.check_path_owner_consistent(dirpath)
+        except Exception:
+            print(f"WARNING: The directory {{dirpath}} is not yours.")
+    @staticmethod
+    def check_others_writable(directory):
+        dir_stat = os.stat(directory)
+        is_writable = (
+                bool(dir_stat.st_mode & stat.S_IWGRP) or  # 组可写
+                bool(dir_stat.st_mode & stat.S_IWOTH)  # 其他用户可写
+        )
+        return is_writable
+    @staticmethod
+    def check_path_readability(path):
+        if not os.access(path, os.R_OK):
+            print('ERROR: The file path %s is not readable.' % path)
+            raise Exception(FILE_PERMISSION_ERROR)
+    @staticmethod
+    def check_path_writability(path):
+        if not os.access(path, os.W_OK):
+            print('ERROR: The file path %s is not writable.' % path)
+            raise Exception(FILE_PERMISSION_ERROR)
+def check_file_or_directory_path(path, isdir=False):
+    """
+    Function Description:
+        check whether the path is valid
+    Parameter:
+        path: the path to check
+        isdir: the path is dir or file
+    Exception Description:
+        when invalid data throw exception
+    """
+    if isdir:
+        path_checker = FileChecker(path, DIR, WRITE_ABLE)
+    else:
+        path_checker = FileChecker(path, FILE, READ_ABLE)
+    path_checker.common_check()
 def load_pt(pt_path, to_cpu=False):
     pt_path = os.path.realpath(pt_path)
+    check_file_or_directory_path(pt_path)
     try:
         if to_cpu:
             pt = torch.load(pt_path, map_location=torch.device("cpu"), weights_only=True)
@@ -202,6 +403,7 @@ def compare_tensor(out_device, out_bench, api_name):
         else:
             abs_err = torch.abs(out_device - out_bench)
             abs_bench = torch.abs(out_bench)
+            eps = 2 ** -23
             if dtype_bench == torch.float32:
                 eps = 2 ** -23
             if dtype_bench == torch.float64:

msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py CHANGED Viewed

@@ -70,7 +70,7 @@ def split_json_file(input_file, num_splits, filter_api):
         split_forward_data = dict(items[start:end])
         temp_data = {
             **input_data,
-            "data":{
+            "data": {
                 **split_forward_data,
                 **backward_data
             }
@@ -87,10 +87,6 @@ def signal_handler(signum, frame):
     raise KeyboardInterrupt()
-signal.signal(signal.SIGINT, signal_handler)
-signal.signal(signal.SIGTERM, signal_handler)
 ParallelUTConfig = namedtuple('ParallelUTConfig', ['api_files', 'out_path', 'num_splits',
                                                    'save_error_data_flag', 'jit_compile_flag', 'device_id',
                                                    'result_csv_path', 'total_items', 'config_path'])
@@ -132,6 +128,9 @@ def run_parallel_ut(config):
                     sys.stdout.flush()
         except ValueError as e:
             logger.warning(f"An error occurred while reading subprocess output: {e}")
+        finally:
+            if process.poll() is None:
+                process.stdout.close()
     def update_progress_bar(progress_bar, result_csv_path):
         while any(process.poll() is None for process in processes):
@@ -142,7 +141,7 @@ def run_parallel_ut(config):
     for api_info in config.api_files:
         cmd = create_cmd(api_info, next(device_id_cycle))
-        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
+        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
                                    text=True, bufsize=1, shell=False)
         processes.append(process)
         threading.Thread(target=read_process_output, args=(process,), daemon=True).start()
@@ -188,8 +187,8 @@ def run_parallel_ut(config):
 def prepare_config(args):
-    api_info_file_checker = FileChecker(file_path=args.api_info_file, path_type=FileCheckConst.FILE,
-                                            ability=FileCheckConst.READ_ABLE, file_type=FileCheckConst.JSON_SUFFIX)
+    api_info_file_checker = FileChecker(file_path=args.api_info_file, path_type=FileCheckConst.FILE,
+                                        ability=FileCheckConst.READ_ABLE, file_type=FileCheckConst.JSON_SUFFIX)
     api_info = api_info_file_checker.common_check()
     out_path = args.out_path if args.out_path else Const.DEFAULT_PATH
     create_directory(out_path)
@@ -198,11 +197,11 @@ def prepare_config(args):
     split_files, total_items = split_json_file(api_info, args.num_splits, args.filter_api)
     config_path = args.config_path if args.config_path else None
     if config_path:
-        config_path_checker = FileChecker(config_path, FileCheckConst.FILE,
+        config_path_checker = FileChecker(config_path, FileCheckConst.FILE,
                                           FileCheckConst.READ_ABLE, FileCheckConst.JSON_SUFFIX)
         config_path = config_path_checker.common_check()
     result_csv_path = args.result_csv_path or os.path.join(
-                      out_path, f"accuracy_checking_result_{time.strftime('%Y%m%d%H%M%S')}.csv")
+        out_path, f"accuracy_checking_result_{time.strftime('%Y%m%d%H%M%S')}.csv")
     if not args.result_csv_path:
         details_csv_path = os.path.join(out_path, f"accuracy_checking_details_{time.strftime('%Y%m%d%H%M%S')}.csv")
         comparator = Comparator(result_csv_path, details_csv_path, False)
@@ -217,9 +216,11 @@ def prepare_config(args):
 def main():
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
     parser = argparse.ArgumentParser(description='Run UT in parallel')
     _run_ut_parser(parser)
-    parser.add_argument('-n', '--num_splits', type=int, choices=range(1, 65), default=8,
+    parser.add_argument('-n', '--num_splits', type=int, choices=range(1, 65), default=8,
                         help='Number of splits for parallel processing. Range: 1-64')
     args = parser.parse_args()
     config = prepare_config(args)

msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py CHANGED Viewed

@@ -65,6 +65,7 @@ def check_tensor_overflow(x):
         return False
+@recursion_depth_decorator("check_data_overflow")
 def check_data_overflow(x, device):
     if isinstance(x, (tuple, list)):
         if not x:

msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py CHANGED Viewed

@@ -45,7 +45,7 @@ from msprobe.pytorch.api_accuracy_checker.compare.compare_column import CompareC
 from msprobe.pytorch.api_accuracy_checker.common.config import CheckerConfig
 from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward
 from msprobe.core.common.file_utils import FileChecker, change_mode, \
-    create_directory, get_json_contents, read_csv, check_file_or_directory_path, check_crt_valid
+    create_directory, get_json_contents, read_csv, check_file_or_directory_path
 from msprobe.pytorch.common.log import logger
 from msprobe.pytorch.pt_config import parse_json_config
 from msprobe.core.common.const import Const, FileCheckConst, CompareConst
@@ -65,7 +65,8 @@ DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv"
 not_backward_list = ['repeat_interleave']
 unsupported_backward_list = ['masked_select']
-unsupported_api_list = ["to"]
+unsupported_api_list = ["to", "empty", "empty_like", "empty_strided", "new_empty", "new_empty_strided",
+                        "empty_with_format"]
 tqdm_params = {
@@ -482,7 +483,6 @@ def _run_ut(parser=None):
     run_ut_command(args)
 def checked_online_config(online_config):
     if not online_config.is_online:
         return
@@ -503,8 +503,10 @@ def checked_online_config(online_config):
         check_file_or_directory_path(online_config.tls_path, isdir=True)
         check_file_or_directory_path(os.path.join(online_config.tls_path, "server.key"))
         check_file_or_directory_path(os.path.join(online_config.tls_path, "server.crt"))
-        check_crt_valid(os.path.join(online_config.tls_path, "server.crt"))
-        check_crt_valid(os.path.join(online_config.tls_path, "server.key"), True)
+        check_file_or_directory_path(os.path.join(online_config.tls_path, "ca.crt"))
+        crl_path = os.path.join(online_config.tls_path, "crl.pem")
+        if os.path.exists(crl_path):
+            check_file_or_directory_path(crl_path)
     # host and port
     if not isinstance(online_config.host, str) or not re.match(Const.ipv4_pattern, online_config.host):
@@ -582,6 +584,7 @@ def run_ut_command(args):
             if len(parts_by_underscore) < 2:
                 raise ValueError("File name part does not contain enough '_' separated segments.")
             time_info = parts_by_underscore[-1]
             global UT_ERROR_DATA_DIR
             UT_ERROR_DATA_DIR = 'ut_error_data' + time_info
         error_data_path = initialize_save_error_data(error_data_path)

msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py CHANGED Viewed

@@ -124,8 +124,6 @@ def exec_api(exec_params):
         api_register.initialize_hook(None)
         api_func_type = list(prefix_map.keys())[list(prefix_map.values()).index(api_type)]
         api_func = api_register.ori_api_attr.get(Const.PT_FRAMEWORK + Const.SEP + api_func_type, {}).get(api_name)
-        if api_func is None:
-            return out
         torch_api = ApiTemplate(api_name, api_func, api_type, None, need_hook=False, device=device)
     if is_autocast:
@@ -257,7 +255,8 @@ def record_skip_info(api_full_name, compare, compare_alg_results):
 def is_unsupported_api(api_name, is_overflow_check=False):
     split_name = api_name.split(Const.SEP)[0]
-    flag = (split_name == Const.DISTRIBUTED) or (is_overflow_check and split_name == Const.NPU)
+    unsupport_type_list = [Const.DISTRIBUTED, Const.MINDSPEED_API_TYPE_PREFIX]
+    flag = (split_name in unsupport_type_list) or (is_overflow_check and split_name == Const.NPU)
     if flag:
         logger.info(f"{split_name} api is not supported for run ut. SKIP.")
     return flag

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py CHANGED Viewed

@@ -12,23 +12,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import hashlib
+from functools import partial
+import zlib
 import io
 import struct
 import time
 import os
-import signal
 from queue import Queue
 from threading import Thread
 from typing import Union
-from twisted.internet import reactor, protocol, endpoints
+from twisted.internet import reactor, protocol, endpoints, ssl
 from twisted.protocols.basic import FileSender
 from msprobe.pytorch.common.utils import logger
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.utils import STRUCT_UNPACK_MODE as unpack_mode, \
-    STR_TO_BYTES_ORDER as bytes_order
+    STR_TO_BYTES_ORDER as bytes_order, cipher_list, verify_callback, load_ssl_pem
 MAX_SENDING_QUEUE_SIZE = 20
@@ -104,11 +103,28 @@ class TCPClient:
         self.factory = MessageClientFactory()
         self.factory.protocol = cur_protocol
         if self.tls_path:
-            from twisted.internet import ssl
-            client_key = os.path.join(self.tls_path, "client.key")
-            client_crt = os.path.join(self.tls_path, "client.crt")
-            client_context_factory = ssl.DefaultOpenSSLContextFactory(client_key, client_crt)
-            endpoint = endpoints.SSL4ClientEndpoint(reactor, self.host, self.port, client_context_factory)
+            client_key, client_crt, ca_crt, crl_pem = load_ssl_pem(
+                key_file=os.path.join(self.tls_path, "client.key"),
+                cert_file=os.path.join(self.tls_path, "client.crt"),
+                ca_file=os.path.join(self.tls_path, "ca.crt"),
+                crl_file=os.path.join(self.tls_path, "crl.pem")
+            )
+            ssl_options = ssl.CertificateOptions(
+                privateKey=client_key,
+                certificate=client_crt,
+                method=ssl.SSL.TLSv1_2_METHOD,
+                verify=True,
+                requireCertificate=True,
+                caCerts=[ca_crt],  # 信任的CA证书列表
+            )
+            ssl_context = ssl_options.getContext()
+            ssl_context.set_cipher_list(cipher_list)
+            ssl_context.set_options(ssl.SSL.OP_NO_RENEGOTIATION)
+            ssl_context.set_verify(ssl.SSL.VERIFY_PEER | ssl.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
+                                   partial(verify_callback, crl=crl_pem))
+            endpoint = endpoints.SSL4ClientEndpoint(reactor, self.host, self.port, ssl_options)
         else:
             endpoint = endpoints.TCP4ClientEndpoint(reactor, self.host, self.port)
         d = endpoint.connect(self.factory)
@@ -299,12 +315,12 @@ class ClientProtocol(protocol.Protocol):
     def send_wrapped_data(self, data, sequence_number: int = 0, rank: int = 0, step: int = 0):
         length = len(data)
-        md5_hash = hashlib.md5(data).hexdigest() if self.check_sum else ""
+        data_crc = f"{zlib.crc32(data):08x}" if self.check_sum else ""
         data_meaasge = length.to_bytes(8, byteorder=bytes_order) + \
                        sequence_number.to_bytes(8, byteorder=bytes_order) + \
                        rank.to_bytes(8, byteorder=bytes_order) + \
                        step.to_bytes(8, byteorder=bytes_order) + \
-                       md5_hash.encode() + \
+                       data_crc.encode() + \
                        data
         logger.debug(f"send 流水号: {sequence_number}; RANK: {rank}; STEP: {step}; LENGTH: {length}")
@@ -346,7 +362,7 @@ class ClientProtocol(protocol.Protocol):
     def connectionLost(self, reason):
         self.signal_exit = True
         self.factory.num_connections -= 1
-        logger.info(f"Lost connection with server, reason is : {reason}")
+        logger.info(f"Lost connection with server, reason is : {reason.value}")
 class MessageClientFactory(protocol.ClientFactory):

mindstudio-probe 1.3.0__py3-none-any.whl → 8.1.1__py3-none-any.whl

mindstudio-probe 1.3.0py3-none-any.whl → 8.1.1py3-none-any.whl