PyPI - mindstudio-probe - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (278) hide show

{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
msprobe/README.md +84 -18
msprobe/__init__.py +16 -1
msprobe/config.json +1 -5
msprobe/core/advisor/advisor.py +16 -11
msprobe/core/advisor/advisor_const.py +6 -7
msprobe/core/advisor/advisor_result.py +12 -12
msprobe/core/common/const.py +164 -3
msprobe/core/common/exceptions.py +26 -4
msprobe/core/common/file_utils.py +196 -27
msprobe/core/common/inplace_op_checker.py +53 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +46 -18
msprobe/core/common/utils.py +308 -209
msprobe/core/common_config.py +60 -38
msprobe/core/compare/acc_compare.py +332 -94
msprobe/core/compare/check.py +104 -22
msprobe/core/compare/compare_cli.py +42 -5
msprobe/core/compare/highlight.py +162 -57
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
msprobe/core/compare/multiprocessing_compute.py +33 -8
msprobe/core/compare/npy_compare.py +73 -29
msprobe/core/compare/utils.py +306 -247
msprobe/core/data_dump/data_collector.py +44 -43
msprobe/core/data_dump/data_processor/base.py +88 -35
msprobe/core/data_dump/data_processor/factory.py +20 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +14 -8
msprobe/core/data_dump/data_processor/pytorch_processor.py +180 -66
msprobe/core/data_dump/json_writer.py +63 -42
msprobe/core/data_dump/scope.py +143 -48
msprobe/core/grad_probe/constant.py +31 -13
msprobe/core/grad_probe/grad_compare.py +20 -4
msprobe/core/grad_probe/utils.py +44 -3
msprobe/core/overflow_check/abnormal_scene.py +185 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +29 -9
msprobe/docs/02.config_introduction.md +83 -84
msprobe/docs/03.config_examples.md +3 -20
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +143 -13
msprobe/docs/06.data_dump_MindSpore.md +197 -88
msprobe/docs/07.accuracy_checker_PyTorch.md +69 -46
msprobe/docs/08.accuracy_checker_online_PyTorch.md +52 -17
msprobe/docs/09.accuracy_checker_MindSpore.md +51 -15
msprobe/docs/10.accuracy_compare_PyTorch.md +187 -99
msprobe/docs/11.accuracy_compare_MindSpore.md +253 -31
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +60 -55
msprobe/docs/16.free_benchmarking_MindSpore.md +159 -0
msprobe/docs/17.grad_probe.md +19 -22
msprobe/docs/18.online_dispatch.md +89 -0
msprobe/docs/19.monitor.md +468 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +386 -0
msprobe/docs/22.visualization_MindSpore.md +384 -0
msprobe/docs/23.tool_function_introduction.md +28 -0
msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +25 -10
msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +16 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +130 -138
msprobe/mindspore/api_accuracy_checker/api_info.py +27 -5
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
msprobe/mindspore/api_accuracy_checker/main.py +27 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +58 -13
msprobe/mindspore/common/const.py +35 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +60 -5
msprobe/mindspore/compare/distributed_compare.py +15 -28
msprobe/mindspore/compare/ms_compare.py +319 -158
msprobe/mindspore/compare/ms_graph_compare.py +99 -49
msprobe/mindspore/debugger/debugger_config.py +20 -14
msprobe/mindspore/debugger/precision_debugger.py +43 -13
msprobe/mindspore/dump/dump_tool_factory.py +18 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +23 -3
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +203 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +107 -10
msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
msprobe/mindspore/dump/jit_dump.py +56 -20
msprobe/mindspore/dump/kernel_graph_dump.py +19 -5
msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -6
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +162 -41
msprobe/mindspore/free_benchmark/common/config.py +15 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +15 -1
msprobe/mindspore/free_benchmark/common/utils.py +37 -8
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +20 -5
msprobe/mindspore/free_benchmark/handler/check_handler.py +21 -7
msprobe/mindspore/free_benchmark/handler/fix_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -6
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +23 -8
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +29 -5
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +25 -10
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +45 -19
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +29 -8
msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +22 -7
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +17 -2
msprobe/mindspore/grad_probe/global_context.py +44 -14
msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +24 -10
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/ms_config.py +22 -15
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -6
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +75 -150
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +24 -7
msprobe/pytorch/__init__.py +23 -3
msprobe/pytorch/api_accuracy_checker/common/config.py +81 -2
msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +50 -25
msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +73 -33
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +44 -18
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +32 -11
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +122 -172
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +158 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +30 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +68 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +115 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +26 -9
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +18 -3
msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +20 -1
msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
msprobe/pytorch/bench_functions/linear.py +15 -0
msprobe/pytorch/bench_functions/matmul_backward.py +33 -6
msprobe/pytorch/bench_functions/npu_fusion_attention.py +280 -157
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +32 -9
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
msprobe/pytorch/bench_functions/swiglu.py +29 -6
msprobe/pytorch/common/__init__.py +15 -0
msprobe/pytorch/common/log.py +18 -6
msprobe/pytorch/common/parse_json.py +31 -16
msprobe/pytorch/common/utils.py +96 -40
msprobe/pytorch/compare/distributed_compare.py +13 -14
msprobe/pytorch/compare/match.py +15 -0
msprobe/pytorch/compare/pt_compare.py +44 -10
msprobe/pytorch/debugger/debugger_config.py +69 -52
msprobe/pytorch/debugger/precision_debugger.py +72 -24
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/free_benchmark/__init__.py +20 -5
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +43 -0
msprobe/pytorch/free_benchmark/common/params.py +23 -1
msprobe/pytorch/free_benchmark/common/utils.py +43 -5
msprobe/pytorch/free_benchmark/compare/grad_saver.py +47 -9
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +17 -0
msprobe/pytorch/free_benchmark/main.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +18 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +21 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +28 -2
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +19 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +65 -16
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +21 -5
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
msprobe/pytorch/function_factory.py +17 -2
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +13 -8
msprobe/pytorch/hook_module/hook_module.py +17 -19
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
msprobe/pytorch/hook_module/utils.py +4 -6
msprobe/pytorch/hook_module/wrap_aten.py +12 -11
msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
msprobe/pytorch/hook_module/wrap_functional.py +21 -20
msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
msprobe/pytorch/hook_module/wrap_torch.py +4 -6
msprobe/pytorch/hook_module/wrap_vf.py +4 -6
msprobe/pytorch/module_processer.py +18 -6
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +340 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +870 -0
msprobe/pytorch/monitor/module_metric.py +193 -0
msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
msprobe/pytorch/monitor/optimizer_collect.py +295 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
msprobe/pytorch/monitor/utils.py +250 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +38 -48
msprobe/pytorch/online_dispatch/dispatch.py +50 -25
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +60 -39
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +9 -1
msprobe/pytorch/online_dispatch/utils.py +48 -23
msprobe/pytorch/parse.py +15 -0
msprobe/pytorch/parse_tool/cli.py +5 -6
msprobe/pytorch/parse_tool/lib/compare.py +19 -26
msprobe/pytorch/parse_tool/lib/config.py +1 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +4 -2
msprobe/pytorch/parse_tool/lib/utils.py +40 -55
msprobe/pytorch/parse_tool/lib/visualization.py +3 -1
msprobe/pytorch/pt_config.py +192 -40
msprobe/pytorch/service.py +110 -35
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +165 -0
msprobe/visualization/builder/msprobe_adapter.py +205 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +130 -0
msprobe/visualization/compare/mode_adapter.py +211 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +124 -0
msprobe/visualization/graph/graph.py +200 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +214 -0
msprobe/visualization/utils.py +232 -0
mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
msprobe/docs/04.acl_config_examples.md +0 -76
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -43
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -107
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0
/msprobe/pytorch/{functional/data_processor.py → monitor/distributed/__init__.py} +0 -0

msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py CHANGED Viewed

@@ -1,8 +1,35 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 import re
+import torch
+try:
+    import torch_npu
+except ImportError:
+    current_device = "cuda"
+else:
+    current_device = "npu"
-from msprobe.core.common.const import FileCheckConst
+from msprobe.core.common.const import FileCheckConst, Const, CompareConst
 from msprobe.core.common.file_utils import FileChecker
+from msprobe.core.common.log import logger
+from msprobe.core.common.utils import CompareException
 from msprobe.pytorch.hook_module.wrap_aten import AtenOPTemplate
 from msprobe.pytorch.hook_module.wrap_functional import FunctionalOPTemplate
 from msprobe.pytorch.hook_module.wrap_npu_custom import NpuOPTemplate
@@ -10,12 +37,21 @@ from msprobe.pytorch.hook_module.wrap_tensor import TensorOPTemplate
 from msprobe.pytorch.hook_module.wrap_torch import TorchOPTemplate
 hf_32_standard_api = ["conv1d", "conv2d"]
+not_detach_set = {'resize_', 'resize_as_', 'set_', 'transpose_', 't_', 'squeeze_', 'unsqueeze_'}
+not_raise_dtype_set = {'type_as'}
+PRECISION_MAPPING = {
+    torch.float16: torch.float32,
+    torch.bfloat16: torch.float32,
+    torch.float32: torch.float64
+}
-class Backward_Message:
+class BackwardMessage:
     MULTIPLE_BACKWARD_MESSAGE = "Multiple backward is not supported."
-    UNSUPPORT_BACKWARD_MESSAGE = "function with out=... arguments don't support automatic differentiation, skip backward."
-    NO_BACKWARD_RESULT_MESSAGE = "function backward result is None, skip backward."
+    UNSUPPORT_BACKWARD_MESSAGE = "function with out=... arguments don't support automatic differentiation, " \
+                                  "skip backward."
+    NO_BACKWARD_RESULT_MESSAGE = "This API does not have backward input data, skip backward."
 class UtDataInfo:
@@ -68,3 +104,121 @@ def exec_api(api_type, api_name, device, args, kwargs):
         torch_api = NpuOPTemplate(api_name, None, False, device)
     out = torch_api.forward(*args, **kwargs)
     return out
+def deal_detach(arg, to_detach=True):
+    return arg.detach() if to_detach else arg
+def raise_bench_data_dtype(api_name, arg, raise_dtype=None):
+    '''
+    将标杆数据的dtype转换为raise_dtype
+    输入：
+        api_name：api名称
+        arg：标杆输入
+        raise_dtype：需要转换的dtype
+    输出：
+        arg: 转换dtype的标杆输入
+    '''
+    if api_name in hf_32_standard_api and arg.dtype == torch.float32:
+        return arg
+    if raise_dtype is None or arg.dtype not in PRECISION_MAPPING or raise_dtype == arg.dtype:
+        return arg
+    return arg.type(raise_dtype)
+def generate_device_params(input_args, input_kwargs, need_backward, api_name):
+    def recursive_arg_to_device(arg_in, to_detach, depth=0):
+        if depth > Const.MAX_DEPTH:
+            logger.error("The depth of arg_in is too large, please check the arg_in.")
+            raise CompareException(CompareException.RECURSION_LIMIT_ERROR)
+        if isinstance(arg_in, (list, tuple)):
+            return type(arg_in)(recursive_arg_to_device(arg, to_detach, depth=depth+1) for arg in arg_in)
+        elif isinstance(arg_in, torch.Tensor):
+            if need_backward and arg_in.requires_grad:
+                arg_in = deal_detach(arg_in.clone(), to_detach).to(current_device).requires_grad_()
+                temp_arg_in = arg_in * 1
+                arg_in = temp_arg_in.type_as(arg_in)
+                arg_in.retain_grad()
+                return arg_in
+            else:
+                return deal_detach(arg_in.clone(), to_detach).to(current_device)
+        else:
+            return arg_in
+    is_detach = api_name not in not_detach_set
+    device_args = recursive_arg_to_device(input_args, is_detach)
+    device_kwargs = \
+        {key: recursive_arg_to_device(value, key != "out" and is_detach) for key, value in input_kwargs.items()}
+    return device_args, device_kwargs
+def generate_cpu_params(input_args, input_kwargs, need_backward, api_name):
+    def recursive_arg_to_cpu(arg_in, to_detach, raise_dtype=None, depth=0):
+        if depth > Const.MAX_DEPTH:
+            logger.error("The depth of arg_in is too large, please check the arg_in.")
+            raise CompareException(CompareException.RECURSION_LIMIT_ERROR)
+        if isinstance(arg_in, (list, tuple)):
+            return type(arg_in)(recursive_arg_to_cpu(arg, to_detach, raise_dtype=raise_dtype, depth=depth+1)
+                                for arg in arg_in)
+        elif isinstance(arg_in, torch.Tensor):
+            if need_backward and arg_in.requires_grad:
+                arg_in = deal_detach(raise_bench_data_dtype(
+                                     api_name, arg_in.clone(), raise_dtype=raise_dtype), to_detach).requires_grad_()
+                temp_arg_in = arg_in * 1
+                arg_in = temp_arg_in.type_as(arg_in)
+                arg_in.retain_grad()
+                return arg_in
+            else:
+                return deal_detach(raise_bench_data_dtype(api_name, arg_in.clone(), raise_dtype=raise_dtype), to_detach)
+        else:
+            return arg_in
+    def is_tensor_with_raise_precision(arg_in, check_kwargs=False):
+        if arg_in.dtype in PRECISION_MAPPING:
+            return True
+        if check_kwargs and arg_in.dtype in [torch.half, torch.bfloat16]:
+            return True
+        return False
+    def recursive_find_dtypes(arg_in, kwargs=None, check_kwargs=False, depth=0):
+        if depth > Const.MAX_DEPTH:
+            logger.error("The depth of arg_in is too large, please check the arg_in.")
+            raise CompareException(CompareException.RECURSION_LIMIT_ERROR)
+        if isinstance(arg_in, (list, tuple)):
+            return set().union(*tuple(recursive_find_dtypes(arg, kwargs, check_kwargs=check_kwargs, depth=depth+1) for
+                                      arg in arg_in))
+        elif isinstance(arg_in, torch.Tensor) and is_tensor_with_raise_precision(arg_in, check_kwargs):
+            return set([arg_in.dtype])
+        elif isinstance(arg_in, dict) and check_kwargs:
+            return set().union(*tuple(recursive_find_dtypes(v, kwargs, check_kwargs=True, depth=depth+1) for
+                                      v in arg_in.values()))
+        return set()
+    raise_dtype = None
+    need_raise_dtypes = recursive_find_dtypes(input_args)
+    need_raise_dtypes.update(recursive_find_dtypes(input_kwargs, check_kwargs=True))
+    if len(need_raise_dtypes) == 1:
+        raise_dtype = PRECISION_MAPPING.get(need_raise_dtypes.pop(), torch.float32)
+    elif len(need_raise_dtypes) >= 2:
+        raise_dtype = torch.float32
+    raise_dtype = None if api_name in not_raise_dtype_set else raise_dtype
+    is_detach = api_name not in not_detach_set
+    cpu_args = recursive_arg_to_cpu(input_args, is_detach, raise_dtype=raise_dtype)
+    cpu_kwargs = {key: recursive_arg_to_cpu(value, key != "out" and is_detach, raise_dtype=raise_dtype) for
+                  key, value in input_kwargs.items()}
+    return cpu_args, cpu_kwargs
+def record_skip_info(api_full_name, compare, compare_alg_results):
+    result_info = (api_full_name, CompareConst.SKIP, CompareConst.SKIP, [compare_alg_results], None, 0)
+    compare.record_results(result_info)
+def is_unsupported_api(api_name, is_overflow_check=False):
+    split_name = api_name.split(Const.SEP)[0]
+    flag = (split_name == Const.DISTRIBUTED) or (is_overflow_check and split_name == Const.NPU)
+    if flag:
+        logger.info(f"{split_name} api is not supported for run ut. SKIP.")
+    return flag

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py CHANGED Viewed

@@ -1,7 +1,21 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import glob
 import os.path
 import time
-import re
 from multiprocessing import Queue
 from typing import Optional, Union, Dict, Any
 from dataclasses import dataclass
@@ -11,9 +25,8 @@ import torch
 from msprobe.pytorch.api_accuracy_checker.common.utils import ApiData
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.client import TCPClient
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.server import TCPServer
-from msprobe.pytorch.common.utils import logger
 from msprobe.core.common.file_utils import remove_path
-from msprobe.pytorch.common.utils import save_api_data, load_api_data, save_pt, load_pt
+from msprobe.pytorch.common.utils import logger, save_api_data, load_api_data, save_pkl, load_pkl
 BufferType = Union[ApiData, Dict[str, Any], str]  # Union[Tensor, Tuple[Optional[Tensor]]]
@@ -40,7 +53,7 @@ class ATTL:
         self.dequeue_list = []
         self.message_end = False
         self.kill_progress = False
-        self.check_attl_config()
+        self.nfs_path = None
         if self.session_config.nfs_path:
             self.nfs_path = self.session_config.nfs_path
         elif self.session_config.is_benchmark_device:
@@ -57,18 +70,6 @@ class ATTL:
                                             self.session_config.tls_path)
             self.socket_manager.start()
-    def check_attl_config(self):
-        if self.session_config.nfs_path:
-            if os.path.exists(self.session_config.nfs_path):
-                return
-            else:
-                raise Exception(f"nfs path {self.session_config.nfs_path} doesn't exists.")
-        ipv4_pattern = "([1-9]?\d|1\d{2}|2[0-4]\d|25[0-5])(\.([1-9]?\d|1\d{2}|2[0-4]\d|25[0-5])){3}$"
-        if not re.match(ipv4_pattern, self.session_config.connect_ip):
-            raise Exception(f"host {self.session_config.connect_ip} is invalid.")
-        if not (0 < self.session_config.connect_port <= 65535):
-            raise Exception(f"port {self.session_config.connect_port} is invalid.")
     def stop_serve(self):
         if isinstance(self.socket_manager, TCPServer):
             self.socket_manager.stop()
@@ -77,6 +78,11 @@ class ATTL:
         """
         npu major in 'send' (client)
         """
+        # if tcp connection lost,
+        if self.socket_manager.signal_exit:
+            raise ConnectionError(f"Failed to connect to {self.session_config.connect_ip}.")
         # know receiver receive and go next
         if isinstance(buffer, ApiData):
             buffer = move2target_device(buffer, torch.device('cpu'))
@@ -94,21 +100,21 @@ class ATTL:
         self.socket_manager.add_to_sending_queue(data, rank=rank, step=step)
     def recv(self, timeout_ms=0) -> Optional[BufferType]:
-        buffer = None
-        while buffer is None:
+        buffer = ''
+        while not buffer:
             if timeout_ms > 0:
                 time.sleep(timeout_ms / 1000.0)
-            if buffer is None and not self.data_queue.empty():
+            if not buffer and not self.data_queue.empty():
                 buffer = self.data_queue.get()
                 break
-            if buffer is None and timeout_ms > 0:  # timeout is the only case we give up and return None
+            if not buffer and timeout_ms > 0:  # timeout is the only case we give up and return None
                 break
             if self.message_end and self.data_queue.empty():
                 buffer = b"KILL_CONFIRM"
                 self.kill_progress = True
                 break
             time.sleep(0.1)  # waiting outside the lock before next attempt
-        if buffer is None:
+        if not buffer:
             # this is a result of a timeout
             self.logger.info(f"RECEIVE API DATA TIMED OUT")
         else:
@@ -125,7 +131,7 @@ class ATTL:
             except Exception as e:
                 self.logger.warning("there is something error. please check it. %s", e)
             if isinstance(buffer, bytes):
-                return None
+                return ''
             if isinstance(buffer, str):
                 return buffer
@@ -139,7 +145,7 @@ class ATTL:
             file_path = os.path.join(self.session_config.nfs_path, buffer + f"_{int(time.time())}")
         try:
-            save_pt(buffer, file_path)
+            save_pkl(buffer, file_path)
         except Exception as e:
             self.logger.warning("there is something error in save_pt. please check it. %s", e)
@@ -155,7 +161,7 @@ class ATTL:
         if cur_file is not None:
             try:
-                buffer = load_pt(cur_file)
+                buffer = load_pkl(cur_file)
             except Exception as e:
                 self.logger.warning("there is something error. please check it. %s", e)
             remove_path(cur_file)

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py CHANGED Viewed

@@ -1,10 +1,24 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import hashlib
 import io
 import struct
 import time
 import os
 import signal
-import sys
 from queue import Queue
 from threading import Thread
 from typing import Union
@@ -13,7 +27,10 @@ from twisted.internet import reactor, protocol, endpoints
 from twisted.protocols.basic import FileSender
 from msprobe.pytorch.common.utils import logger
-from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.ssl_config import cipher_list
+from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.utils import STRUCT_UNPACK_MODE as unpack_mode, \
+    STR_TO_BYTES_ORDER as bytes_order
+MAX_SENDING_QUEUE_SIZE = 20
 class TCPDataItem:
@@ -31,7 +48,6 @@ class TCPDataItem:
 class TCPClient:
-    MAX_SENDING_QUEUE_SIZE = 20
     ACK_SUCCESS = b"OK___"
     ACK_ERROR = b"ERROR"
     ACK_BUSY = b"BUSY_"
@@ -39,13 +55,13 @@ class TCPClient:
     ACK_STOP_CONFIRM = b"OVER_"
     ACK_KILL_PROCESS = b"KILL_"
-    QUEUE_PENDING_TIME = 600  # 队列10分钟都处于阻塞状态，则终止sending进程
+    QUEUE_PENDING_TIME = 60
     RESEND_RETRY_TIMES = 2  # 最大重传数
     RESEND_TIMER_TIME = 5  # 接收ACK超时定时器
     RESEND_PENDING_TIME = 60  # 连续pending时间超过1分钟则放弃该数据
     def __init__(self, host="localhost", port=8000, check_sum=False, tls_path=None):
-        self.send_queue = Queue(self.MAX_SENDING_QUEUE_SIZE)
+        self.send_queue = Queue(MAX_SENDING_QUEUE_SIZE)
         self.resend_dict = dict()
         self.host = host
         self.port = port
@@ -55,7 +71,8 @@ class TCPClient:
         self.signal_exit = False
         self.tcp_manager = ClientProtocol(ack_queue_size=100,
                                           chunk_size=655360,
-                                          check_sum=check_sum)
+                                          check_sum=check_sum,
+                                          tls=self.tls_path)
         self.send_thread = Thread(target=self._sending_queue_data)
         self.send_thread.setDaemon(True)
         self.send_thread.start()
@@ -80,8 +97,6 @@ class TCPClient:
             time.sleep(1)
             reactor.stop()
             logger.error(f"Failed to connected {self.host} {self.port}. Reason is {failure.getErrorMessage()}")
-            os.kill(os.getpid(), signal.SIGKILL)
-            os.kill(os.getppid(), signal.SIGKILL)
         def cur_protocol():
             return self.tcp_manager
@@ -89,14 +104,10 @@ class TCPClient:
         self.factory = MessageClientFactory()
         self.factory.protocol = cur_protocol
         if self.tls_path:
-            from OpenSSL import SSL
             from twisted.internet import ssl
             client_key = os.path.join(self.tls_path, "client.key")
             client_crt = os.path.join(self.tls_path, "client.crt")
-            client_context_factory = ssl.DefaultOpenSSLContextFactory(client_key, client_crt, SSL.TLSv1_2_METHOD)
-            client_context_ = client_context_factory.getContext()
-            client_context_.set_cipher_list(cipher_list)
-            client_context_.set_options(SSL.OP_NO_RENEGOTIATION)
+            client_context_factory = ssl.DefaultOpenSSLContextFactory(client_key, client_crt)
             endpoint = endpoints.SSL4ClientEndpoint(reactor, self.host, self.port, client_context_factory)
         else:
             endpoint = endpoints.TCP4ClientEndpoint(reactor, self.host, self.port)
@@ -109,7 +120,11 @@ class TCPClient:
     def send_after_queue_empty(self, data):
         while not self._ready_to_exit():
-            self.add_to_sending_queue(data)
+            if not self.tls_path:
+                self.add_to_sending_queue(data)
+            else:
+                for _ in range(MAX_SENDING_QUEUE_SIZE):
+                    self.add_to_sending_queue(data)
             time.sleep(2)
     def check_client_alive(self):
@@ -124,8 +139,6 @@ class TCPClient:
             if not self.check_client_alive():
                 break
             time.sleep(1)
-        while not self.tcp_manager.kill_process:
-            time.sleep(1)
     def add_to_sending_queue(self, data: Union[bytes, TCPDataItem], rank: int = 0, step: int = 0):
         if self._ready_to_exit():
@@ -142,7 +155,8 @@ class TCPClient:
             self.send_queue.put(send_data, block=True, timeout=self.QUEUE_PENDING_TIME)
         except Exception as e:
             logger.error(f"send_queue put send_data timeout, rank: {send_data.rank}, step: {send_data.step},"
-                         f"sequence_number: {send_data.sequence_number}, {str(e)}")
+                         f"sequence_number: {send_data.sequence_number}, send_queue size: {self.send_queue.qsize()},"
+                         f"{str(e)}")
     def _send_data(self, data: TCPDataItem):
         self.tcp_manager.send_wrapped_data(data.raw_data,
@@ -159,10 +173,11 @@ class TCPClient:
             while self.send_queue.qsize() > 0:
                 if self._ready_to_exit():
                     break
-                if len(self.resend_dict) < self.MAX_SENDING_QUEUE_SIZE:
+                if len(self.resend_dict) < MAX_SENDING_QUEUE_SIZE:
                     data_obj = self.send_queue.get()
-                    self._send_data(data_obj)
                     resend_key = str(data_obj.sequence_number) + "_" + str(data_obj.rank) + "_" + str(data_obj.step)
+                    logger.debug(f"get {resend_key} from send_queue, and send to server.")
+                    self._send_data(data_obj)
                     if resend_key not in self.resend_dict.keys():
                         # Send data for the first time
                         self.resend_dict[resend_key] = data_obj
@@ -233,7 +248,7 @@ class TCPClient:
 class ClientProtocol(protocol.Protocol):
     TIMEOUT = 60 * 10
-    def __init__(self, ack_queue_size=100, chunk_size=65536, check_sum=False):
+    def __init__(self, ack_queue_size=100, chunk_size=65536, check_sum=False, tls=None):
         self.buffer = io.BytesIO()
         self.is_connected = False
         self.check_sum = check_sum
@@ -244,6 +259,13 @@ class ClientProtocol(protocol.Protocol):
         self.signal_exit = False
         self.defer = None
         self.kill_process = False
+        self.ack = None
+        self.timeout_call = None
+        self.tls = tls
+        self.send_buffer = b""
+        self.buffer_cnt = 0
     def dataReceived(self, data):
         if self.timeout_call.active():
@@ -255,9 +277,11 @@ class ClientProtocol(protocol.Protocol):
         while True:
             if len(self.buffer.getvalue()) >= 29:  # 5 + 8 * 3
                 ack = self.buffer.read(5)
-                seq_number = struct.unpack('!Q', self.buffer.read(8))[0]
-                rank = struct.unpack('!Q', self.buffer.read(8))[0]
-                step = struct.unpack('!Q', self.buffer.read(8))[0]
+                self.ack = ack
+                seq_number = struct.unpack(unpack_mode, self.buffer.read(8))[0]
+                rank = struct.unpack(unpack_mode, self.buffer.read(8))[0]
+                step = struct.unpack(unpack_mode, self.buffer.read(8))[0]
+                logger.debug(f"receive 流水号: {seq_number}; RANK: {rank}; STEP: {step}; ACK: {ack}")
                 if ack == b"KILL_":
                     self.kill_process = True
                     logger.debug(f"接收到KILL信号, PID {os.getpid()}")
@@ -276,20 +300,33 @@ class ClientProtocol(protocol.Protocol):
     def send_wrapped_data(self, data, sequence_number: int = 0, rank: int = 0, step: int = 0):
         length = len(data)
         md5_hash = hashlib.md5(data).hexdigest() if self.check_sum else ""
+        data_meaasge = length.to_bytes(8, byteorder=bytes_order) + \
+                       sequence_number.to_bytes(8, byteorder=bytes_order) + \
+                       rank.to_bytes(8, byteorder=bytes_order) + \
+                       step.to_bytes(8, byteorder=bytes_order) + \
+                       md5_hash.encode() + \
+                       data
+        logger.debug(f"send 流水号: {sequence_number}; RANK: {rank}; STEP: {step}; LENGTH: {length}")
         while True:
             if self.defer is None or self.defer.called:
-                self.defer = self.send_large_data(
-                    length.to_bytes(8, byteorder='big') +
-                    sequence_number.to_bytes(8, byteorder='big') +
-                    rank.to_bytes(8, byteorder='big') +
-                    step.to_bytes(8, byteorder='big') +
-                    md5_hash.encode() +
-                    data)
+                self.defer = self.send_large_data(data_meaasge)
                 break
             time.sleep(0.01)
     def send_large_data(self, data):
-        d = self.file_sender.beginFileTransfer(io.BytesIO(data), self.transport)
+        if self.tls:
+            self.send_buffer += data
+            self.buffer_cnt += 1
+            if self.buffer_cnt >= MAX_SENDING_QUEUE_SIZE:
+                d = self.file_sender.beginFileTransfer(io.BytesIO(self.send_buffer), self.transport)
+                self.send_buffer = b""
+                self.buffer_cnt = 0
+            else:
+                d = None
+        else:
+            d = self.file_sender.beginFileTransfer(io.BytesIO(data), self.transport)
         return d
     def connection_timeout(self):

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import time
 from collections import namedtuple
@@ -12,6 +27,8 @@ from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import DETAIL_TE
 from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api
 from msprobe.pytorch.common.log import logger
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import move2target_device
+from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_cpu_params
 # NPU vs GPU api list
 CompareApi = set(absolute_standard_api) | set(binary_standard_api) | set(thousandth_standard_api)
@@ -75,7 +92,8 @@ def online_precision_compare(api_data, device, common_config, api_precision_csv_
     try:
         # NPU vs CPU
-        cpu_out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, npu_args, npu_kwargs)
+        cpu_args, cpu_kwargs = generate_cpu_params(npu_args, npu_kwargs, False, api_name)
+        cpu_out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs)
         npu_data_info = UtDataInfo(None, None, npu_out, cpu_out, None, [], None, rank=api_data.rank)
         npu_detail = compare.compare_output(api_full_name, npu_data_info, True)
         npu_data = pd.DataFrame(npu_detail, columns=DETAIL_TEST_ROWS[-1])
@@ -156,7 +174,10 @@ class ConsumerDispatcher:
     def start(self, handle_func, config):
         self.queues = [mp.Queue(maxsize=self.capacity) for _ in range(self.num_workers)]
-        api_precision_csv_file = [ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME, ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME]
+        api_precision_csv_file = [
+            ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME,
+            ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME
+        ]
         common_config = CommonCompareConfig(self.compare, handle_func, config)
         for xpu_id, q in enumerate(self.queues):
             p = mp.Process(name="run_ut_process", target=run_ut_process,
@@ -164,8 +185,10 @@ class ConsumerDispatcher:
             p.start()
             self.processes.append(p)
-        logger.info(f"Api_precision_compare task result will be saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}")
-        logger.info(f"Api_precision_compare task details will be saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
+        logger.info(
+            f'Api_precision_compare task result will be saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}')
+        logger.info(
+            f"Api_precision_compare task details will be saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
         logger.info("Successfully start unittest process.")
     def stop(self):

mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.1py3-none-any.whl