PyPI - mindstudio-probe - Versions diffs - 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.2py3-none-any.whl → 8.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (261) hide show

{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/METADATA +4 -3
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/RECORD +243 -191
msprobe/README.md +57 -21
msprobe/core/__init__.py +17 -0
msprobe/core/common/const.py +224 -82
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +5 -3
msprobe/core/common/file_utils.py +274 -40
msprobe/core/common/framework_adapter.py +169 -0
msprobe/core/common/global_lock.py +86 -0
msprobe/core/common/runtime.py +25 -0
msprobe/core/common/utils.py +148 -72
msprobe/core/common_config.py +7 -0
msprobe/core/compare/acc_compare.py +640 -462
msprobe/core/compare/check.py +36 -107
msprobe/core/compare/compare_cli.py +4 -0
msprobe/core/compare/config.py +72 -0
msprobe/core/compare/highlight.py +217 -215
msprobe/core/compare/layer_mapping/layer_mapping.py +4 -1
msprobe/core/compare/merge_result/merge_result.py +12 -6
msprobe/core/compare/multiprocessing_compute.py +227 -107
msprobe/core/compare/npy_compare.py +32 -16
msprobe/core/compare/utils.py +218 -244
msprobe/{mindspore/runtime.py → core/config_check/__init__.py} +2 -4
msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
msprobe/core/config_check/checkers/base_checker.py +60 -0
msprobe/core/config_check/checkers/dataset_checker.py +138 -0
msprobe/core/config_check/checkers/env_args_checker.py +96 -0
msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
msprobe/core/config_check/checkers/pip_checker.py +90 -0
msprobe/core/config_check/checkers/random_checker.py +367 -0
msprobe/core/config_check/checkers/weights_checker.py +147 -0
msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
msprobe/core/config_check/config_check_cli.py +51 -0
msprobe/core/config_check/config_checker.py +100 -0
msprobe/{pytorch/parse.py → core/config_check/resource/dependency.yaml} +7 -4
msprobe/core/config_check/resource/env.yaml +57 -0
msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
msprobe/core/config_check/utils/utils.py +107 -0
msprobe/core/data_dump/api_registry.py +239 -0
msprobe/core/data_dump/data_collector.py +36 -9
msprobe/core/data_dump/data_processor/base.py +74 -53
msprobe/core/data_dump/data_processor/mindspore_processor.py +119 -78
msprobe/core/data_dump/data_processor/pytorch_processor.py +134 -96
msprobe/core/data_dump/json_writer.py +146 -57
msprobe/core/debugger/precision_debugger.py +143 -0
msprobe/core/grad_probe/constant.py +2 -1
msprobe/core/grad_probe/grad_compare.py +2 -2
msprobe/core/grad_probe/utils.py +1 -1
msprobe/core/hook_manager.py +242 -0
msprobe/core/monitor/anomaly_processor.py +384 -0
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/core/service.py +356 -0
msprobe/core/single_save/__init__.py +0 -0
msprobe/core/single_save/single_comparator.py +243 -0
msprobe/core/single_save/single_saver.py +157 -0
msprobe/docs/01.installation.md +6 -5
msprobe/docs/02.config_introduction.md +89 -30
msprobe/docs/03.config_examples.md +1 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +184 -50
msprobe/docs/06.data_dump_MindSpore.md +193 -28
msprobe/docs/07.accuracy_checker_PyTorch.md +13 -3
msprobe/docs/08.accuracy_checker_online_PyTorch.md +72 -10
msprobe/docs/09.accuracy_checker_MindSpore.md +19 -7
msprobe/docs/10.accuracy_compare_PyTorch.md +266 -102
msprobe/docs/11.accuracy_compare_MindSpore.md +117 -43
msprobe/docs/12.overflow_check_PyTorch.md +5 -3
msprobe/docs/13.overflow_check_MindSpore.md +6 -4
msprobe/docs/14.data_parse_PyTorch.md +4 -10
msprobe/docs/17.grad_probe.md +2 -1
msprobe/docs/18.online_dispatch.md +3 -3
msprobe/docs/19.monitor.md +211 -103
msprobe/docs/21.visualization_PyTorch.md +100 -28
msprobe/docs/22.visualization_MindSpore.md +103 -31
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/25.tool_function_introduction.md +23 -22
msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
msprobe/docs/27.dump_json_instruction.md +278 -8
msprobe/docs/28.debugger_save_instruction.md +111 -20
msprobe/docs/28.kernel_dump_MindSpore.md +1 -1
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/31.config_check.md +95 -0
msprobe/docs/32.ckpt_compare.md +69 -0
msprobe/docs/33.generate_operator_MindSpore.md +190 -0
msprobe/docs/34.RL_collect.md +92 -0
msprobe/docs/35.nan_analyze.md +72 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/save_compare_result_sample.png +0 -0
msprobe/docs/img/visualization/proxy.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +3 -3
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -55
msprobe/mindspore/api_accuracy_checker/api_runner.py +25 -11
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +580 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
msprobe/mindspore/api_accuracy_checker/data_manager.py +4 -3
msprobe/mindspore/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +451 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +11 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/cell_processor.py +204 -33
msprobe/mindspore/code_mapping/graph_parser.py +4 -21
msprobe/mindspore/common/const.py +73 -2
msprobe/mindspore/common/utils.py +157 -29
msprobe/mindspore/compare/common_dir_compare.py +382 -0
msprobe/mindspore/compare/distributed_compare.py +2 -26
msprobe/mindspore/compare/ms_compare.py +18 -398
msprobe/mindspore/compare/ms_graph_compare.py +20 -10
msprobe/mindspore/compare/utils.py +37 -0
msprobe/mindspore/debugger/debugger_config.py +59 -7
msprobe/mindspore/debugger/precision_debugger.py +83 -90
msprobe/mindspore/dump/cell_dump_process.py +902 -0
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +889 -0
msprobe/mindspore/dump/dump_tool_factory.py +18 -8
msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
msprobe/mindspore/dump/hook_cell/api_register.py +176 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +22 -12
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +42 -26
msprobe/mindspore/dump/jit_dump.py +35 -27
msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -16
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +22 -12
msprobe/mindspore/free_benchmark/common/utils.py +1 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +9 -2
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/mindspore_service.py +111 -0
msprobe/mindspore/monitor/common_func.py +52 -0
msprobe/mindspore/monitor/data_writers.py +237 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +1 -1
msprobe/mindspore/monitor/features.py +13 -1
msprobe/mindspore/monitor/module_hook.py +568 -444
msprobe/mindspore/monitor/optimizer_collect.py +331 -0
msprobe/mindspore/monitor/utils.py +71 -9
msprobe/mindspore/ms_config.py +16 -15
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/task_handler_factory.py +5 -2
msprobe/msprobe.py +19 -0
msprobe/nan_analyze/__init__.py +14 -0
msprobe/nan_analyze/analyzer.py +255 -0
msprobe/nan_analyze/graph.py +189 -0
msprobe/nan_analyze/utils.py +211 -0
msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +15 -13
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +206 -4
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +9 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +6 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +31 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -20
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +154 -0
msprobe/pytorch/attl_manager.py +65 -0
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +6 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
msprobe/pytorch/common/utils.py +53 -19
msprobe/pytorch/compare/distributed_compare.py +4 -36
msprobe/pytorch/compare/pt_compare.py +13 -84
msprobe/pytorch/compare/utils.py +47 -0
msprobe/pytorch/debugger/debugger_config.py +34 -17
msprobe/pytorch/debugger/precision_debugger.py +50 -96
msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
msprobe/pytorch/dump/module_dump/module_dump.py +15 -61
msprobe/pytorch/dump/module_dump/module_processer.py +150 -114
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +1 -1
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
msprobe/pytorch/hook_module/api_register.py +155 -0
msprobe/pytorch/hook_module/hook_module.py +18 -22
msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +193 -75
msprobe/pytorch/hook_module/utils.py +28 -2
msprobe/pytorch/monitor/csv2tb.py +14 -4
msprobe/pytorch/monitor/data_writers.py +259 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +336 -241
msprobe/pytorch/monitor/module_metric.py +17 -0
msprobe/pytorch/monitor/optimizer_collect.py +244 -224
msprobe/pytorch/monitor/utils.py +84 -4
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +13 -2
msprobe/pytorch/online_dispatch/dump_compare.py +8 -2
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +5 -4
msprobe/pytorch/pt_config.py +16 -11
msprobe/pytorch/pytorch_service.py +70 -0
msprobe/visualization/builder/graph_builder.py +69 -10
msprobe/visualization/builder/msprobe_adapter.py +24 -12
msprobe/visualization/compare/graph_comparator.py +63 -51
msprobe/visualization/compare/mode_adapter.py +22 -20
msprobe/visualization/graph/base_node.py +11 -4
msprobe/visualization/graph/distributed_analyzer.py +1 -10
msprobe/visualization/graph/graph.py +2 -13
msprobe/visualization/graph/node_op.py +1 -2
msprobe/visualization/graph_service.py +251 -104
msprobe/visualization/utils.py +26 -44
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -207
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -140
msprobe/mindspore/monitor/anomaly_detect.py +0 -404
msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
msprobe/mindspore/service.py +0 -543
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -79
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
msprobe/pytorch/monitor/anomaly_detect.py +0 -410
msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
msprobe/pytorch/service.py +0 -470
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
/msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
/msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0

msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py CHANGED Viewed

@@ -40,7 +40,7 @@ from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import get_validat
 from msprobe.pytorch.api_accuracy_checker.common.utils import extract_detailed_api_segments, extract_basic_api_segments
 from msprobe.core.common.file_utils import FileChecker, change_mode, create_directory
 from msprobe.pytorch.common.log import logger
-from msprobe.core.common.utils import CompareException
+from msprobe.core.common.utils import CompareException, check_op_str_pattern_valid
 from msprobe.core.common.const import Const, CompareConst, FileCheckConst
 CompareConfig = namedtuple('CompareConfig', ['npu_csv_path', 'gpu_csv_path', 'result_csv_path', 'details_csv_path'])
@@ -151,6 +151,7 @@ def analyse_csv(npu_data, gpu_data, config):
         message = ''
         compare_column = ApiPrecisionOutputColumn()
         full_api_name_with_direction_status = row_npu[ApiPrecisionCompareColumn.API_NAME]
+        check_op_str_pattern_valid(full_api_name_with_direction_status)
         row_gpu = gpu_data[gpu_data[ApiPrecisionCompareColumn.API_NAME] == full_api_name_with_direction_status]
         api_name, api_full_name, direction_status = extract_detailed_api_segments(full_api_name_with_direction_status)
         if not api_full_name:
@@ -430,6 +431,7 @@ def _api_precision_compare(parser=None):
     _api_precision_compare_parser(parser)
     args = parser.parse_args(sys.argv[1:])
     _api_precision_compare_command(args)
+    logger.info("Compare task completed.")
 def _api_precision_compare_command(args):
@@ -457,8 +459,3 @@ def _api_precision_compare_parser(parser):
     parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str,
                         help="<optional> The api precision compare task result out path.",
                         required=False)
-if __name__ == '__main__':
-    _api_precision_compare()
-    logger.info("Compare task completed.")

msprobe/pytorch/api_accuracy_checker/compare/compare.py CHANGED Viewed

@@ -40,6 +40,7 @@ from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import check_dty
     DETAIL_TEST_ROWS, BENCHMARK_COMPARE_SUPPORT_LIST
 from msprobe.pytorch.api_accuracy_checker.common.utils import extract_basic_api_segments
 from msprobe.pytorch.common.log import logger
+from msprobe.core.common.decorator import recursion_depth_decorator
 ResultInfo = namedtuple('ResultInfo', ['full_api_name', 'fwd_success_status', 'bwd_success_status',
@@ -178,6 +179,41 @@ class Comparator:
             if not os.path.exists(detail_save_path):
                 write_csv(DETAIL_TEST_ROWS, detail_save_path)
+    @recursion_depth_decorator("compare_core")
+    def _compare_core(self, api_name, bench_output, device_output):
+        compare_column = CompareColumn()
+        if not isinstance(bench_output, type(device_output)):
+            status = CompareConst.ERROR
+            message = "bench and npu output type is different."
+        elif isinstance(bench_output, dict):
+            b_keys, n_keys = set(bench_output.keys()), set(device_output.keys())
+            if b_keys != n_keys:
+                status = CompareConst.ERROR
+                message = "bench and npu output dict keys are different."
+            else:
+                status, compare_column, message = self._compare_core(api_name, list(bench_output.values()),
+                                                                     list(device_output.values()))
+        elif isinstance(bench_output, torch.Tensor):
+            copy_bench_out = bench_output.detach().clone()
+            copy_device_output = device_output.detach().clone()
+            compare_column.bench_type = str(copy_bench_out.dtype)
+            compare_column.npu_type = str(copy_device_output.dtype)
+            compare_column.shape = tuple(device_output.shape)
+            status, compare_column, message = self._compare_torch_tensor(api_name, copy_bench_out, copy_device_output,
+                                                                         compare_column)
+        elif isinstance(bench_output, (bool, int, float, str)):
+            compare_column.bench_type = str(type(bench_output))
+            compare_column.npu_type = str(type(device_output))
+            status, compare_column, message = self._compare_builtin_type(bench_output, device_output, compare_column)
+        elif bench_output is None:
+            status = CompareConst.SKIP
+            message = "Bench output is None, skip this test."
+        else:
+            status = CompareConst.ERROR
+            message = "Unexpected output type in compare_core: {}".format(type(bench_output))
+        return status, compare_column, message
     def write_summary_csv(self, test_result):
         test_rows = []
         try:
@@ -293,40 +329,6 @@ class Comparator:
                     test_final_success = CompareConst.WARNING
         return test_final_success, detailed_result_total
-    def _compare_core(self, api_name, bench_output, device_output):
-        compare_column = CompareColumn()
-        if not isinstance(bench_output, type(device_output)):
-            status = CompareConst.ERROR
-            message = "bench and npu output type is different."
-        elif isinstance(bench_output, dict):
-            b_keys, n_keys = set(bench_output.keys()), set(device_output.keys())
-            if b_keys != n_keys:
-                status = CompareConst.ERROR
-                message = "bench and npu output dict keys are different."
-            else:
-                status, compare_column, message = self._compare_core(api_name, list(bench_output.values()),
-                                                                     list(device_output.values()))
-        elif isinstance(bench_output, torch.Tensor):
-            copy_bench_out = bench_output.detach().clone()
-            copy_device_output = device_output.detach().clone()
-            compare_column.bench_type = str(copy_bench_out.dtype)
-            compare_column.npu_type = str(copy_device_output.dtype)
-            compare_column.shape = tuple(device_output.shape)
-            status, compare_column, message = self._compare_torch_tensor(api_name, copy_bench_out, copy_device_output,
-                                                                         compare_column)
-        elif isinstance(bench_output, (bool, int, float, str)):
-            compare_column.bench_type = str(type(bench_output))
-            compare_column.npu_type = str(type(device_output))
-            status, compare_column, message = self._compare_builtin_type(bench_output, device_output, compare_column)
-        elif bench_output is None:
-            status = CompareConst.SKIP
-            message = "Bench output is None, skip this test."
-        else:
-            status = CompareConst.ERROR
-            message = "Unexpected output type in compare_core: {}".format(type(bench_output))
-        return status, compare_column, message
     def _compare_torch_tensor(self, api_name, bench_output, device_output, compare_column):
         cpu_shape = bench_output.shape
         npu_shape = device_output.shape

msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py CHANGED Viewed

@@ -28,10 +28,10 @@ from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import binary_st
 ulp_standard_api, thousandth_standard_api
 from msprobe.core.common.file_utils import FileOpen, load_json, save_json
 from msprobe.core.common.utils import check_file_or_directory_path, check_op_str_pattern_valid, is_int
-from msprobe.core.common.const import Const, MonitorConst, MsgConst
+from msprobe.core.common.const import Const, MonitorConst, MsgConst, FileCheckConst
 from msprobe.core.common.log import logger
-from msprobe.core.common.file_utils import make_dir
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.file_utils import make_dir, change_mode
+from msprobe.core.common.decorator import recursion_depth_decorator
 TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"]
 TORCH_BOOL_TYPE = ["torch.bool"]
@@ -50,6 +50,7 @@ DATA_NAME = "data_name"
 API_MAX_LENGTH = 30
 PROPAGATION_LIST = [Const.FORWARD, Const.BACKWARD]
 DATAMODE_LIST = ["random_data", "real_data"]
+ITER_MAX_TIMES = 1000
 class APIInfo:
@@ -97,6 +98,8 @@ class CommonConfig:
         iter_t = self.iter_times
         if iter_t <= 0:
             raise ValueError("iter_times should be an integer bigger than zero!")
+        if iter_t > ITER_MAX_TIMES:
+            raise ValueError("iter_times should not be greater than 1000!")
         json_file = self.extract_api_path
         propagation = self.propagation
@@ -117,7 +120,7 @@ class CommonConfig:
         # Retrieve the first API name and dictionary
         forward_item = next(iter(json_content.items()), None)
-        if not forward_item or not isinstance(forward_item[1], dict):
+        if not forward_item or not isinstance(forward_item[1], dict) or not forward_item[1]:
             raise ValueError(f'Invalid forward API data in json_content!')
         # if propagation is backward, ensure json file contains forward and backward info
@@ -127,7 +130,7 @@ class CommonConfig:
         # if propagation is backward, ensure it has valid data
         if propagation == Const.BACKWARD:
             backward_item = list(json_content.items())[1]
-            if not isinstance(backward_item[1], dict):
+            if not isinstance(backward_item[1], dict) or not backward_item[1]:
                 raise ValueError(f'Invalid backward API data in json_content!')
         return json_content
@@ -169,7 +172,7 @@ class APIExtractor:
                     value = self.load_real_data_path(value, real_data_path)
                 new_data[key] = value
         if not new_data:
-            logger.error(f"Error: The api '{self.api_name}' does not exist in the file.")
+            logger.warning(f"Warning: The api '{self.api_name}' does not exist in the file.")
         else:
             save_json(self.output_file, new_data, indent=4)
             logger.info(
@@ -183,6 +186,7 @@ class APIExtractor:
                     self.update_data_name(v, dump_data_dir)
         return value
+    @recursion_depth_decorator("OpGenerator: APIExtractor.update_data_name")
     def update_data_name(self, data, dump_data_dir):
         if isinstance(data, list):
             for item in data:
@@ -407,19 +411,16 @@ class OperatorScriptGenerator:
         return kwargs_dict_generator
 def _op_generator_parser(parser):
-    parser.add_argument("-i", "--config_input", dest="config_input", default='', type=str,
-                        help="<Optional> Path of config json file", required=True)
+    parser.add_argument("-i", "--config_input", dest="config_input", type=str,
+                        help="<Required> Path of config json file", required=True)
     parser.add_argument("-o", "--api_output_path", dest="api_output_path", type=str,
-                        help="<Required> Path of extract api_name.json.",
-                        required=True)
+                        help="<Required> Path of extract api_name.json.", required=True)
 def parse_json_config(json_file_path):
     if not json_file_path:
-        config_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-        json_file_path = os.path.join(config_dir, "config.json")
+        raise Exception("config_input path can not be empty, please check.")
     json_config = load_json(json_file_path)
     common_config = CommonConfig(json_config)
     return common_config
@@ -467,6 +468,7 @@ def _run_operator_generate_commond(cmd_args):
             fout.write(code_template.format(**internal_settings))
     except OSError:
         logger.error(f"Failed to open file. Please check file {template_path} or {operator_script_path}.")
+    change_mode(operator_script_path, FileCheckConst.DATA_FILE_AUTHORITY)
     logger.info(f"Generate operator script successfully and the name is {operator_script_path}.")

msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template CHANGED Viewed

@@ -1,6 +1,6 @@
-import json
 import os
-import math
+import re
+import stat
 from enum import Enum, auto
 import torch
 try:
@@ -25,6 +25,31 @@ RAISE_PRECISION = {{
 }}
 THOUSANDTH_THRESHOLDING = 0.001
 BACKWARD = 'backward'
+DIR = "dir"
+FILE = "file"
+READ_ABLE = "read"
+WRITE_ABLE = "write"
+READ_WRITE_ABLE = "read and write"
+DIRECTORY_LENGTH = 4096
+FILE_NAME_LENGTH = 255
+SOFT_LINK_ERROR = "检测到软链接"
+FILE_PERMISSION_ERROR = "文件权限错误"
+INVALID_FILE_ERROR = "无效文件"
+ILLEGAL_PATH_ERROR = "非法文件路径"
+ILLEGAL_PARAM_ERROR = "非法打开方式"
+FILE_TOO_LARGE_ERROR = "文件过大"
+FILE_VALID_PATTERN = r"^[a-zA-Z0-9_.:/-]+$"
+FILE_SIZE_DICT = {{
+    ".pkl": 1073741824,  # 1 * 1024 * 1024 * 1024
+    ".npy": 10737418240,  # 10 * 1024 * 1024 * 1024
+    ".json": 1073741824,  # 1 * 1024 * 1024 * 1024
+    ".pt": 10737418240,  # 10 * 1024 * 1024 * 1024
+    ".csv": 1073741824,  # 1 * 1024 * 1024 * 1024
+    ".xlsx": 1073741824,  # 1 * 1024 * 1024 * 1024
+    ".yaml": 1073741824,  # 1 * 1024 * 1024 * 1024
+    ".ir": 1073741824  # 1 * 1024 * 1024 * 1024
+}}
+COMMOM_FILE_SIZE = 1048576  # 1 * 1024 * 1024
 class CompareStandard(Enum):
     BINARY_EQUALITY_STANDARD = auto()
@@ -33,13 +58,189 @@ class CompareStandard(Enum):
     BENCHMARK_STANDARD = auto()
     THOUSANDTH_STANDARD = auto()
+class FileChecker:
+    """
+    The class for check file.
+    Attributes:
+        file_path: The file or dictionary path to be verified.
+        path_type: file or dictionary
+        ability(str): FileCheckConst.WRITE_ABLE or FileCheckConst.READ_ABLE to set file has writability or readability
+        file_type(str): The correct file type for file
+    """
+    def __init__(self, file_path, path_type, ability=None, file_type=None, is_script=True):
+        self.file_path = file_path
+        self.path_type = self._check_path_type(path_type)
+        self.ability = ability
+        self.file_type = file_type
+        self.is_script = is_script
+    @staticmethod
+    def _check_path_type(path_type):
+        if path_type not in [DIR, FILE]:
+            print(f'ERROR: The path_type must be {{DIR}} or {{FILE}}.')
+            raise Exception(ILLEGAL_PARAM_ERROR)
+        return path_type
+    def common_check(self):
+        """
+        功能：用户校验基本文件权限：软连接、文件长度、是否存在、读写权限、文件属组、文件特殊字符
+        注意：文件后缀的合法性，非通用操作，可使用其他独立接口实现
+        """
+        FileChecker.check_path_exists(self.file_path)
+        FileChecker.check_link(self.file_path)
+        self.file_path = os.path.realpath(self.file_path)
+        FileChecker.check_path_length(self.file_path)
+        FileChecker.check_path_type(self.file_path, self.path_type)
+        self.check_path_ability()
+        if self.is_script:
+            FileChecker.check_path_owner_consistent(self.file_path)
+        FileChecker.check_path_pattern_valid(self.file_path)
+        FileChecker.check_common_file_size(self.file_path)
+        FileChecker.check_file_suffix(self.file_path, self.file_type)
+        if self.path_type == FILE:
+            FileChecker.check_dirpath_before_read(self.file_path)
+        return self.file_path
+    def check_path_ability(self):
+        if self.ability == WRITE_ABLE:
+            FileChecker.check_path_writability(self.file_path)
+        if self.ability == READ_ABLE:
+            FileChecker.check_path_readability(self.file_path)
+        if self.ability == READ_WRITE_ABLE:
+            FileChecker.check_path_readability(self.file_path)
+            FileChecker.check_path_writability(self.file_path)
+    @staticmethod
+    def check_path_exists(path):
+        if not os.path.exists(path):
+            print(f'ERROR: The file path %s does not exist.' % path)
+            raise Exception()
+    @staticmethod
+    def check_link(path):
+        abs_path = os.path.abspath(path)
+        if os.path.islink(abs_path):
+            print('ERROR: The file path {{}} is a soft link.'.format(path))
+            raise Exception(SOFT_LINK_ERROR)
+    @staticmethod
+    def check_path_length(path, name_length=None):
+        file_max_name_length = name_length if name_length else FILE_NAME_LENGTH
+        if len(path) > DIRECTORY_LENGTH or \
+                len(os.path.basename(path)) > file_max_name_length:
+            print(f'ERROR: The file path length exceeds limit.')
+            raise Exception(ILLEGAL_PATH_ERROR)
+    @staticmethod
+    def check_path_type(file_path, file_type):
+        if file_type == FILE:
+            if not os.path.isfile(file_path):
+                print(f"ERROR: The {{file_path}} should be a file!")
+                raise Exception(INVALID_FILE_ERROR)
+        if file_type == DIR:
+            if not os.path.isdir(file_path):
+                print(f"ERROR: The {{file_path}} should be a dictionary!")
+                raise Exception(INVALID_FILE_ERROR)
+    @staticmethod
+    def check_path_owner_consistent(path):
+        file_owner = os.stat(path).st_uid
+        if file_owner != os.getuid() and os.getuid() != 0:
+            print('ERROR: The file path %s may be insecure because is does not belong to you.' % path)
+            raise Exception(FILE_PERMISSION_ERROR)
+    @staticmethod
+    def check_path_pattern_valid(path):
+        if not re.match(FILE_VALID_PATTERN, path):
+            print('ERROR: The file path %s contains special characters.' % (path))
+            raise Exception(ILLEGAL_PATH_ERROR)
+    @staticmethod
+    def check_common_file_size(file_path):
+        if os.path.isfile(file_path):
+            for suffix, max_size in FILE_SIZE_DICT.items():
+                if file_path.endswith(suffix):
+                    FileChecker.check_file_size(file_path, max_size)
+                    return
+            FileChecker.check_file_size(file_path, COMMOM_FILE_SIZE)
+    @staticmethod
+    def check_file_size(file_path, max_size):
+        try:
+            file_size = os.path.getsize(file_path)
+        except OSError as os_error:
+            print(f'ERROR: Failed to open "{{file_path}}". {{str(os_error)}}')
+            raise Exception(INVALID_FILE_ERROR) from os_error
+        if file_size >= max_size:
+            print(f'ERROR: The size ({{file_size}}) of {{file_path}} exceeds ({{max_size}}) bytes, tools not support.')
+            raise Exception(FILE_TOO_LARGE_ERROR)
+    @staticmethod
+    def check_file_suffix(file_path, file_suffix):
+        if file_suffix:
+            if not file_path.endswith(file_suffix):
+                print(f"The {{file_path}} should be a {{file_suffix}} file!")
+                raise Exception(INVALID_FILE_ERROR)
+    @staticmethod
+    def check_dirpath_before_read(path):
+        path = os.path.realpath(path)
+        dirpath = os.path.dirname(path)
+        if FileChecker.check_others_writable(dirpath):
+            print(f"WARNING: The directory is writable by others: {{dirpath}}.")
+        try:
+            FileChecker.check_path_owner_consistent(dirpath)
+        except Exception:
+            print(f"WARNING: The directory {{dirpath}} is not yours.")
+    @staticmethod
+    def check_others_writable(directory):
+        dir_stat = os.stat(directory)
+        is_writable = (
+                bool(dir_stat.st_mode & stat.S_IWGRP) or  # 组可写
+                bool(dir_stat.st_mode & stat.S_IWOTH)  # 其他用户可写
+        )
+        return is_writable
+    @staticmethod
+    def check_path_readability(path):
+        if not os.access(path, os.R_OK):
+            print('ERROR: The file path %s is not readable.' % path)
+            raise Exception(FILE_PERMISSION_ERROR)
+    @staticmethod
+    def check_path_writability(path):
+        if not os.access(path, os.W_OK):
+            print('ERROR: The file path %s is not writable.' % path)
+            raise Exception(FILE_PERMISSION_ERROR)
+def check_file_or_directory_path(path, isdir=False):
+    """
+    Function Description:
+        check whether the path is valid
+    Parameter:
+        path: the path to check
+        isdir: the path is dir or file
+    Exception Description:
+        when invalid data throw exception
+    """
+    if isdir:
+        path_checker = FileChecker(path, DIR, WRITE_ABLE)
+    else:
+        path_checker = FileChecker(path, FILE, READ_ABLE)
+    path_checker.common_check()
 def load_pt(pt_path, to_cpu=False):
     pt_path = os.path.realpath(pt_path)
+    check_file_or_directory_path(pt_path)
     try:
         if to_cpu:
-            pt = torch.load(pt_path, map_location=torch.device("cpu"))
+            pt = torch.load(pt_path, map_location=torch.device("cpu"), weights_only=True)
         else:
-            pt = torch.load(pt_path)
+            pt = torch.load(pt_path, weights_only=True)
     except Exception as e:
         raise RuntimeError(f"load pt file {{pt_path}} failed") from e
     return pt
@@ -202,6 +403,7 @@ def compare_tensor(out_device, out_bench, api_name):
         else:
             abs_err = torch.abs(out_device - out_bench)
             abs_bench = torch.abs(out_bench)
+            eps = 2 ** -23
             if dtype_bench == torch.float32:
                 eps = 2 ** -23
             if dtype_bench == torch.float64:

msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py CHANGED Viewed

@@ -50,6 +50,9 @@ def split_json_file(input_file, num_splits, filter_api):
         backward_data[f"{data_name}.backward"] = backward_data.pop(data_name)
     input_data = load_json(input_file)
+    if "dump_data_dir" not in input_data.keys():
+        logger.error("Invalid input file, 'dump_data_dir' field is missing")
+        raise CompareException("Invalid input file, 'dump_data_dir' field is missing")
     if input_data.get("data") is None:
         logger.error("Invalid input file, 'data' field is missing")
         raise CompareException("Invalid input file, 'data' field is missing")
@@ -84,10 +87,6 @@ def signal_handler(signum, frame):
     raise KeyboardInterrupt()
-signal.signal(signal.SIGINT, signal_handler)
-signal.signal(signal.SIGTERM, signal_handler)
 ParallelUTConfig = namedtuple('ParallelUTConfig', ['api_files', 'out_path', 'num_splits',
                                                    'save_error_data_flag', 'jit_compile_flag', 'device_id',
                                                    'result_csv_path', 'total_items', 'config_path'])
@@ -97,7 +96,7 @@ def run_parallel_ut(config):
     processes = []
     device_id_cycle = cycle(config.device_id)
     if config.save_error_data_flag:
-        logger.info("UT task error datas will be saved")
+        logger.info("UT task error data will be saved")
     logger.info(f"Starting parallel UT with {config.num_splits} processes")
     progress_bar = tqdm(total=config.total_items, desc="Total items", unit="items")
@@ -129,6 +128,9 @@ def run_parallel_ut(config):
                     sys.stdout.flush()
         except ValueError as e:
             logger.warning(f"An error occurred while reading subprocess output: {e}")
+        finally:
+            if process.poll() is None:
+                process.stdout.close()
     def update_progress_bar(progress_bar, result_csv_path):
         while any(process.poll() is None for process in processes):
@@ -214,6 +216,8 @@ def prepare_config(args):
 def main():
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
     parser = argparse.ArgumentParser(description='Run UT in parallel')
     _run_ut_parser(parser)
     parser.add_argument('-n', '--num_splits', type=int, choices=range(1, 65), default=8,
@@ -221,7 +225,3 @@ def main():
     args = parser.parse_args()
     config = prepare_config(args)
     run_parallel_ut(config)
-if __name__ == '__main__':
-    main()

msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py CHANGED Viewed

@@ -34,8 +34,10 @@ from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import exec_api, i
 from msprobe.core.common.file_utils import check_link, FileChecker
 from msprobe.pytorch.api_accuracy_checker.common.utils import extract_basic_api_segments
 from msprobe.core.common.const import FileCheckConst, Const
+from msprobe.core.common.utils import check_op_str_pattern_valid
 from msprobe.pytorch.common.log import logger
 from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward
+from msprobe.core.common.decorator import recursion_depth_decorator
 def check_tensor_overflow(x):
@@ -63,6 +65,7 @@ def check_tensor_overflow(x):
         return False
+@recursion_depth_decorator("check_data_overflow")
 def check_data_overflow(x, device):
     if isinstance(x, (tuple, list)):
         if not x:
@@ -75,6 +78,7 @@ def check_data_overflow(x, device):
             return torch_npu.npu.utils.npu_check_overflow(x)
+@recursion_depth_decorator("is_bool_output")
 def is_bool_output(x):
     if isinstance(x, (tuple, list)):
         if not x:
@@ -91,6 +95,7 @@ def run_overflow_check(forward_file):
         dump_path = os.path.dirname(forward_file)
         real_data_path = os.path.join(dump_path, Const.DUMP_TENSOR_DATA)
     for api_full_name, api_info_dict in tqdm(forward_content.items()):
+        check_op_str_pattern_valid(api_full_name)
         if is_unsupported_api(api_full_name, is_overflow_check=True):
             continue
         try:
@@ -161,6 +166,7 @@ def _run_overflow_check(parser=None):
     _run_overflow_check_parser(parser)
     args = parser.parse_args(sys.argv[1:])
     _run_overflow_check_command(args)
+    logger.info("UT task completed.")
 def _run_overflow_check_command(args):
@@ -175,8 +181,3 @@ def _run_overflow_check_command(args):
         logger.error(f"Set NPU device id failed. device id is: {args.device_id}")
         raise NotImplementedError from error
     run_overflow_check(api_info)
-if __name__ == '__main__':
-    _run_overflow_check()
-    logger.info("UT task completed.")

mindstudio-probe 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl

mindstudio-probe 1.2.2py3-none-any.whl → 8.1.0py3-none-any.whl