PyPI - mindstudio-probe - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/METADATA +1 -1
mindstudio_probe-1.1.0.dist-info/RECORD +287 -0
msprobe/README.md +46 -16
msprobe/__init__.py +16 -1
msprobe/config.json +0 -2
msprobe/core/advisor/advisor.py +8 -8
msprobe/core/advisor/advisor_const.py +6 -7
msprobe/core/advisor/advisor_result.py +12 -12
msprobe/core/common/const.py +64 -3
msprobe/core/common/exceptions.py +2 -2
msprobe/core/common/file_utils.py +54 -9
msprobe/core/common/inplace_op_checker.py +38 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +21 -11
msprobe/core/common/utils.py +153 -167
msprobe/core/common_config.py +18 -25
msprobe/core/compare/acc_compare.py +209 -36
msprobe/core/compare/check.py +102 -17
msprobe/core/compare/compare_cli.py +21 -1
msprobe/core/compare/highlight.py +41 -5
msprobe/core/compare/multiprocessing_compute.py +33 -8
msprobe/core/compare/npy_compare.py +21 -6
msprobe/core/compare/utils.py +82 -48
msprobe/core/data_dump/data_collector.py +31 -32
msprobe/core/data_dump/data_processor/base.py +45 -22
msprobe/core/data_dump/data_processor/factory.py +20 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +11 -5
msprobe/core/data_dump/data_processor/pytorch_processor.py +24 -7
msprobe/core/data_dump/json_writer.py +63 -42
msprobe/core/data_dump/scope.py +32 -16
msprobe/core/grad_probe/constant.py +4 -0
msprobe/core/grad_probe/grad_compare.py +2 -3
msprobe/core/grad_probe/utils.py +16 -3
msprobe/docs/01.installation.md +19 -9
msprobe/docs/02.config_introduction.md +52 -80
msprobe/docs/03.config_examples.md +3 -13
msprobe/docs/04.acl_config_examples.md +11 -9
msprobe/docs/05.data_dump_PyTorch.md +140 -12
msprobe/docs/06.data_dump_MindSpore.md +47 -5
msprobe/docs/07.accuracy_checker_PyTorch.md +57 -34
msprobe/docs/08.accuracy_checker_online_PyTorch.md +51 -11
msprobe/docs/09.accuracy_checker_MindSpore.md +8 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +181 -99
msprobe/docs/11.accuracy_compare_MindSpore.md +162 -31
msprobe/docs/13.overflow_check_MindSpore.md +1 -1
msprobe/docs/15.free_benchmarking_PyTorch.md +59 -53
msprobe/docs/16.free_benchmarking_MindSpore.md +140 -0
msprobe/docs/17.grad_probe.md +14 -16
msprobe/docs/18.online_dispatch.md +89 -0
msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +22 -10
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +1 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +35 -11
msprobe/mindspore/api_accuracy_checker/api_info.py +7 -0
msprobe/mindspore/cell_processor.py +27 -3
msprobe/mindspore/common/const.py +2 -0
msprobe/mindspore/common/utils.py +18 -2
msprobe/mindspore/compare/distributed_compare.py +9 -22
msprobe/mindspore/compare/layer_mapping.py +146 -0
msprobe/mindspore/compare/modify_mapping.py +107 -0
msprobe/mindspore/compare/ms_compare.py +173 -35
msprobe/mindspore/compare/ms_graph_compare.py +27 -11
msprobe/mindspore/debugger/debugger_config.py +16 -13
msprobe/mindspore/debugger/precision_debugger.py +37 -13
msprobe/mindspore/dump/dump_tool_factory.py +16 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +11 -1
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +206 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +82 -10
msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
msprobe/mindspore/dump/jit_dump.py +41 -17
msprobe/mindspore/dump/kernel_graph_dump.py +19 -3
msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -4
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +19 -4
msprobe/mindspore/free_benchmark/common/config.py +15 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +15 -0
msprobe/mindspore/free_benchmark/common/utils.py +19 -5
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +16 -2
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +18 -3
msprobe/mindspore/free_benchmark/handler/base_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/fix_handler.py +15 -0
msprobe/mindspore/free_benchmark/handler/handler_factory.py +18 -3
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +22 -7
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -0
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +22 -7
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +44 -18
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +18 -4
msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +20 -5
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +15 -0
msprobe/mindspore/grad_probe/global_context.py +18 -8
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
msprobe/mindspore/service.py +42 -123
msprobe/pytorch/__init__.py +20 -1
msprobe/pytorch/api_accuracy_checker/common/config.py +19 -2
msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +47 -21
msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +67 -32
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +26 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +19 -2
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +51 -125
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +146 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +21 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +78 -33
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +110 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +36 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +18 -3
msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +15 -0
msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
msprobe/pytorch/bench_functions/linear.py +15 -0
msprobe/pytorch/bench_functions/matmul_backward.py +21 -6
msprobe/pytorch/bench_functions/npu_fusion_attention.py +180 -151
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +28 -9
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
msprobe/pytorch/bench_functions/swiglu.py +20 -5
msprobe/pytorch/common/__init__.py +15 -0
msprobe/pytorch/common/log.py +18 -6
msprobe/pytorch/common/parse_json.py +26 -11
msprobe/pytorch/common/utils.py +40 -35
msprobe/pytorch/compare/distributed_compare.py +11 -11
msprobe/pytorch/compare/match.py +15 -0
msprobe/pytorch/compare/pt_compare.py +38 -6
msprobe/pytorch/debugger/debugger_config.py +52 -39
msprobe/pytorch/debugger/precision_debugger.py +72 -24
msprobe/pytorch/free_benchmark/__init__.py +20 -5
msprobe/pytorch/free_benchmark/common/enums.py +28 -0
msprobe/pytorch/free_benchmark/common/params.py +15 -0
msprobe/pytorch/free_benchmark/common/utils.py +17 -1
msprobe/pytorch/free_benchmark/compare/grad_saver.py +28 -7
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +15 -0
msprobe/pytorch/free_benchmark/main.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +26 -2
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +55 -16
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
msprobe/pytorch/function_factory.py +17 -2
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_stat_csv.py +2 -2
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +13 -8
msprobe/pytorch/hook_module/hook_module.py +17 -19
msprobe/pytorch/hook_module/utils.py +4 -6
msprobe/pytorch/hook_module/wrap_aten.py +12 -11
msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
msprobe/pytorch/hook_module/wrap_functional.py +10 -11
msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
msprobe/pytorch/hook_module/wrap_torch.py +4 -6
msprobe/pytorch/hook_module/wrap_vf.py +4 -6
msprobe/pytorch/module_processer.py +17 -2
msprobe/pytorch/online_dispatch/compare.py +11 -12
msprobe/pytorch/online_dispatch/single_compare.py +7 -7
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +8 -0
msprobe/pytorch/online_dispatch/utils.py +1 -4
msprobe/pytorch/parse.py +15 -0
msprobe/pytorch/parse_tool/cli.py +5 -6
msprobe/pytorch/parse_tool/lib/compare.py +9 -10
msprobe/pytorch/parse_tool/lib/parse_tool.py +3 -0
msprobe/pytorch/parse_tool/lib/utils.py +28 -24
msprobe/pytorch/parse_tool/lib/visualization.py +1 -1
msprobe/pytorch/pt_config.py +167 -38
msprobe/pytorch/service.py +97 -32
mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/functional/data_processor.py +0 -0
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/top_level.txt +0 -0

msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py CHANGED Viewed

@@ -1,8 +1,35 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 import re
+import torch
+try:
+    import torch_npu
+except ImportError:
+    current_device = "cuda"
+else:
+    current_device = "npu"
-from msprobe.core.common.const import FileCheckConst
+from msprobe.core.common.const import FileCheckConst, Const, CompareConst
 from msprobe.core.common.file_utils import FileChecker
+from msprobe.core.common.log import logger
+from msprobe.core.common.utils import CompareException
 from msprobe.pytorch.hook_module.wrap_aten import AtenOPTemplate
 from msprobe.pytorch.hook_module.wrap_functional import FunctionalOPTemplate
 from msprobe.pytorch.hook_module.wrap_npu_custom import NpuOPTemplate
@@ -10,11 +37,20 @@ from msprobe.pytorch.hook_module.wrap_tensor import TensorOPTemplate
 from msprobe.pytorch.hook_module.wrap_torch import TorchOPTemplate
 hf_32_standard_api = ["conv1d", "conv2d"]
+not_detach_set = {'resize_', 'resize_as_', 'set_', 'transpose_', 't_', 'squeeze_', 'unsqueeze_'}
+not_raise_dtype_set = {'type_as'}
+PRECISION_MAPPING = {
+    torch.float16: torch.float32,
+    torch.bfloat16: torch.float32,
+    torch.float32: torch.float64
+}
-class Backward_Message:
+class BackwardMessage:
     MULTIPLE_BACKWARD_MESSAGE = "Multiple backward is not supported."
-    UNSUPPORT_BACKWARD_MESSAGE = "function with out=... arguments don't support automatic differentiation, skip backward."
+    UNSUPPORT_BACKWARD_MESSAGE = "function with out=... arguments don't support automatic differentiation, " \
+                                  "skip backward."
     NO_BACKWARD_RESULT_MESSAGE = "function backward result is None, skip backward."
@@ -68,3 +104,110 @@ def exec_api(api_type, api_name, device, args, kwargs):
         torch_api = NpuOPTemplate(api_name, None, False, device)
     out = torch_api.forward(*args, **kwargs)
     return out
+def deal_detach(arg, to_detach=True):
+    return arg.detach() if to_detach else arg
+def raise_bench_data_dtype(api_name, arg, raise_dtype=None):
+    '''
+    将标杆数据的dtype转换为raise_dtype
+    输入：
+        api_name：api名称
+        arg：标杆输入
+        raise_dtype：需要转换的dtype
+    输出：
+        arg: 转换dtype的标杆输入
+    '''
+    if api_name in hf_32_standard_api and arg.dtype == torch.float32:
+        return arg
+    if raise_dtype is None or arg.dtype not in PRECISION_MAPPING or raise_dtype == arg.dtype:
+        return arg
+    return arg.type(raise_dtype)
+def generate_device_params(input_args, input_kwargs, need_backward, api_name):
+    def recursive_arg_to_device(arg_in, to_detach, depth=0):
+        if depth > Const.MAX_DEPTH:
+            logger.error("The depth of arg_in is too large, please check the arg_in.")
+            raise CompareException(CompareException.RECURSION_LIMIT_ERROR)
+        if isinstance(arg_in, (list, tuple)):
+            return type(arg_in)(recursive_arg_to_device(arg, to_detach, depth=depth+1) for arg in arg_in)
+        elif isinstance(arg_in, torch.Tensor):
+            if need_backward and arg_in.requires_grad:
+                arg_in = deal_detach(arg_in.clone(), to_detach).to(current_device).requires_grad_()
+                temp_arg_in = arg_in * 1
+                arg_in = temp_arg_in.type_as(arg_in)
+                arg_in.retain_grad()
+                return arg_in
+            else:
+                return deal_detach(arg_in.clone(), to_detach).to(current_device)
+        else:
+            return arg_in
+    is_detach = api_name not in not_detach_set
+    device_args = recursive_arg_to_device(input_args, is_detach)
+    device_kwargs = \
+        {key: recursive_arg_to_device(value, key != "out" and is_detach) for key, value in input_kwargs.items()}
+    return device_args, device_kwargs
+def generate_cpu_params(input_args, input_kwargs, need_backward, api_name):
+    def recursive_arg_to_cpu(arg_in, to_detach, raise_dtype=None, depth=0):
+        if depth > Const.MAX_DEPTH:
+            logger.error("The depth of arg_in is too large, please check the arg_in.")
+            raise CompareException(CompareException.RECURSION_LIMIT_ERROR)
+        if isinstance(arg_in, (list, tuple)):
+            return type(arg_in)(recursive_arg_to_cpu(arg, to_detach, raise_dtype=raise_dtype, depth=depth+1)
+                                for arg in arg_in)
+        elif isinstance(arg_in, torch.Tensor):
+            if need_backward and arg_in.requires_grad:
+                arg_in = deal_detach(raise_bench_data_dtype(
+                                     api_name, arg_in.clone(), raise_dtype=raise_dtype), to_detach).requires_grad_()
+                temp_arg_in = arg_in * 1
+                arg_in = temp_arg_in.type_as(arg_in)
+                arg_in.retain_grad()
+                return arg_in
+            else:
+                return deal_detach(raise_bench_data_dtype(api_name, arg_in.clone(), raise_dtype=raise_dtype), to_detach)
+        else:
+            return arg_in
+    def is_tensor_with_raise_precision(arg_in, check_kwargs=False):
+        if arg_in.dtype in PRECISION_MAPPING:
+            return True
+        if check_kwargs and arg_in.dtype in [torch.half, torch.bfloat16]:
+            return True
+        return False
+    def recursive_find_dtypes(arg_in, kwargs=None, check_kwargs=False, depth=0):
+        if depth > Const.MAX_DEPTH:
+            logger.error("The depth of arg_in is too large, please check the arg_in.")
+            raise CompareException(CompareException.RECURSION_LIMIT_ERROR)
+        if isinstance(arg_in, (list, tuple)):
+            return set().union(*tuple(recursive_find_dtypes(arg, kwargs, check_kwargs=check_kwargs, depth=depth+1) for arg in arg_in))
+        elif isinstance(arg_in, torch.Tensor) and is_tensor_with_raise_precision(arg_in, check_kwargs):
+            return set([arg_in.dtype])
+        elif isinstance(arg_in, dict) and check_kwargs:
+            return set().union(*tuple(recursive_find_dtypes(v, kwargs, check_kwargs=True, depth=depth+1) for v in arg_in.values()))
+        return set()
+    raise_dtype = None
+    need_raise_dtypes = recursive_find_dtypes(input_args)
+    need_raise_dtypes.update(recursive_find_dtypes(input_kwargs, check_kwargs=True))
+    if len(need_raise_dtypes) == 1:
+        raise_dtype = PRECISION_MAPPING.get(need_raise_dtypes.pop(), torch.float32)
+    elif len(need_raise_dtypes) >= 2:
+        raise_dtype = torch.float32
+    raise_dtype = None if api_name in not_raise_dtype_set else raise_dtype
+    is_detach = api_name not in not_detach_set
+    cpu_args = recursive_arg_to_cpu(input_args, is_detach, raise_dtype=raise_dtype)
+    cpu_kwargs = {key: recursive_arg_to_cpu(value, key != "out" and is_detach, raise_dtype=raise_dtype) for key, value in input_kwargs.items()}
+    return cpu_args, cpu_kwargs
+def record_skip_info(api_full_name, compare, compare_alg_results):
+    result_info = (api_full_name, CompareConst.SKIP, CompareConst.SKIP, [compare_alg_results], None, 0)
+    compare.record_results(result_info)

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import glob
 import os.path
 import time
@@ -41,6 +56,7 @@ class ATTL:
         self.message_end = False
         self.kill_progress = False
         self.check_attl_config()
+        self.nfs_path = None
         if self.session_config.nfs_path:
             self.nfs_path = self.session_config.nfs_path
         elif self.session_config.is_benchmark_device:
@@ -77,6 +93,11 @@ class ATTL:
         """
         npu major in 'send' (client)
         """
+        # if tcp connection lost,
+        if self.socket_manager.signal_exit:
+            raise ConnectionError(f"Failed to connect to {self.session_config.connect_ip}.")
         # know receiver receive and go next
         if isinstance(buffer, ApiData):
             buffer = move2target_device(buffer, torch.device('cpu'))

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py CHANGED Viewed

@@ -1,10 +1,24 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import hashlib
 import io
 import struct
 import time
 import os
 import signal
-import sys
 from queue import Queue
 from threading import Thread
 from typing import Union
@@ -13,7 +27,10 @@ from twisted.internet import reactor, protocol, endpoints
 from twisted.protocols.basic import FileSender
 from msprobe.pytorch.common.utils import logger
-from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.ssl_config import cipher_list
+from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.utils import struct_unpack_mode as unpack_mode, \
+    str_to_bytes_order as bytes_order
+MAX_SENDING_QUEUE_SIZE = 20
 class TCPDataItem:
@@ -31,7 +48,6 @@ class TCPDataItem:
 class TCPClient:
-    MAX_SENDING_QUEUE_SIZE = 20
     ACK_SUCCESS = b"OK___"
     ACK_ERROR = b"ERROR"
     ACK_BUSY = b"BUSY_"
@@ -39,13 +55,13 @@ class TCPClient:
     ACK_STOP_CONFIRM = b"OVER_"
     ACK_KILL_PROCESS = b"KILL_"
-    QUEUE_PENDING_TIME = 600  # 队列10分钟都处于阻塞状态，则终止sending进程
+    QUEUE_PENDING_TIME = 60
     RESEND_RETRY_TIMES = 2  # 最大重传数
     RESEND_TIMER_TIME = 5  # 接收ACK超时定时器
     RESEND_PENDING_TIME = 60  # 连续pending时间超过1分钟则放弃该数据
     def __init__(self, host="localhost", port=8000, check_sum=False, tls_path=None):
-        self.send_queue = Queue(self.MAX_SENDING_QUEUE_SIZE)
+        self.send_queue = Queue(MAX_SENDING_QUEUE_SIZE)
         self.resend_dict = dict()
         self.host = host
         self.port = port
@@ -55,7 +71,8 @@ class TCPClient:
         self.signal_exit = False
         self.tcp_manager = ClientProtocol(ack_queue_size=100,
                                           chunk_size=655360,
-                                          check_sum=check_sum)
+                                          check_sum=check_sum,
+                                          tls=self.tls_path)
         self.send_thread = Thread(target=self._sending_queue_data)
         self.send_thread.setDaemon(True)
         self.send_thread.start()
@@ -67,6 +84,15 @@ class TCPClient:
     def run_reactor():
         reactor.run(installSignalHandlers=False)
+    def check_tls_path(self):
+        client_key = os.path.join(self.tls_path, "client.key")
+        client_crt = os.path.join(self.tls_path, "client.crt")
+        if not os.path.exists(client_key):
+            raise Exception(f"client_key: {client_key} is not exists.")
+        if not os.path.exists(client_crt):
+            raise Exception(f"client_crt: {client_crt} is not exists.")
+        return client_key, client_crt
     def start(self):
         def conn_callback(cur_protocol):
             if cur_protocol.transport and cur_protocol.transport.getPeer().host == self.host:
@@ -80,8 +106,6 @@ class TCPClient:
             time.sleep(1)
             reactor.stop()
             logger.error(f"Failed to connected {self.host} {self.port}. Reason is {failure.getErrorMessage()}")
-            os.kill(os.getpid(), signal.SIGKILL)
-            os.kill(os.getppid(), signal.SIGKILL)
         def cur_protocol():
             return self.tcp_manager
@@ -89,14 +113,9 @@ class TCPClient:
         self.factory = MessageClientFactory()
         self.factory.protocol = cur_protocol
         if self.tls_path:
-            from OpenSSL import SSL
             from twisted.internet import ssl
-            client_key = os.path.join(self.tls_path, "client.key")
-            client_crt = os.path.join(self.tls_path, "client.crt")
-            client_context_factory = ssl.DefaultOpenSSLContextFactory(client_key, client_crt, SSL.TLSv1_2_METHOD)
-            client_context_ = client_context_factory.getContext()
-            client_context_.set_cipher_list(cipher_list)
-            client_context_.set_options(SSL.OP_NO_RENEGOTIATION)
+            client_key, client_crt = self.check_tls_path()
+            client_context_factory = ssl.DefaultOpenSSLContextFactory(client_key, client_crt)
             endpoint = endpoints.SSL4ClientEndpoint(reactor, self.host, self.port, client_context_factory)
         else:
             endpoint = endpoints.TCP4ClientEndpoint(reactor, self.host, self.port)
@@ -109,7 +128,11 @@ class TCPClient:
     def send_after_queue_empty(self, data):
         while not self._ready_to_exit():
-            self.add_to_sending_queue(data)
+            if not self.tls_path:
+                self.add_to_sending_queue(data)
+            else:
+                for _ in range(MAX_SENDING_QUEUE_SIZE):
+                    self.add_to_sending_queue(data)
             time.sleep(2)
     def check_client_alive(self):
@@ -124,8 +147,6 @@ class TCPClient:
             if not self.check_client_alive():
                 break
             time.sleep(1)
-        while not self.tcp_manager.kill_process:
-            time.sleep(1)
     def add_to_sending_queue(self, data: Union[bytes, TCPDataItem], rank: int = 0, step: int = 0):
         if self._ready_to_exit():
@@ -142,7 +163,8 @@ class TCPClient:
             self.send_queue.put(send_data, block=True, timeout=self.QUEUE_PENDING_TIME)
         except Exception as e:
             logger.error(f"send_queue put send_data timeout, rank: {send_data.rank}, step: {send_data.step},"
-                         f"sequence_number: {send_data.sequence_number}, {str(e)}")
+                         f"sequence_number: {send_data.sequence_number}, send_queue size: {self.send_queue.qsize()},"
+                         f"{str(e)}")
     def _send_data(self, data: TCPDataItem):
         self.tcp_manager.send_wrapped_data(data.raw_data,
@@ -159,10 +181,11 @@ class TCPClient:
             while self.send_queue.qsize() > 0:
                 if self._ready_to_exit():
                     break
-                if len(self.resend_dict) < self.MAX_SENDING_QUEUE_SIZE:
+                if len(self.resend_dict) < MAX_SENDING_QUEUE_SIZE:
                     data_obj = self.send_queue.get()
-                    self._send_data(data_obj)
                     resend_key = str(data_obj.sequence_number) + "_" + str(data_obj.rank) + "_" + str(data_obj.step)
+                    logger.debug(f"get {resend_key} from send_queue, and send to server.")
+                    self._send_data(data_obj)
                     if resend_key not in self.resend_dict.keys():
                         # Send data for the first time
                         self.resend_dict[resend_key] = data_obj
@@ -233,7 +256,7 @@ class TCPClient:
 class ClientProtocol(protocol.Protocol):
     TIMEOUT = 60 * 10
-    def __init__(self, ack_queue_size=100, chunk_size=65536, check_sum=False):
+    def __init__(self, ack_queue_size=100, chunk_size=65536, check_sum=False, tls=None):
         self.buffer = io.BytesIO()
         self.is_connected = False
         self.check_sum = check_sum
@@ -244,6 +267,13 @@ class ClientProtocol(protocol.Protocol):
         self.signal_exit = False
         self.defer = None
         self.kill_process = False
+        self.ack = None
+        self.timeout_call = None
+        self.tls = tls
+        self.send_buffer = b""
+        self.buffer_cnt = 0
     def dataReceived(self, data):
         if self.timeout_call.active():
@@ -255,9 +285,11 @@ class ClientProtocol(protocol.Protocol):
         while True:
             if len(self.buffer.getvalue()) >= 29:  # 5 + 8 * 3
                 ack = self.buffer.read(5)
-                seq_number = struct.unpack('!Q', self.buffer.read(8))[0]
-                rank = struct.unpack('!Q', self.buffer.read(8))[0]
-                step = struct.unpack('!Q', self.buffer.read(8))[0]
+                self.ack = ack
+                seq_number = struct.unpack(unpack_mode, self.buffer.read(8))[0]
+                rank = struct.unpack(unpack_mode, self.buffer.read(8))[0]
+                step = struct.unpack(unpack_mode, self.buffer.read(8))[0]
+                logger.debug(f"receive 流水号: {seq_number}; RANK: {rank}; STEP: {step}; ACK: {ack}")
                 if ack == b"KILL_":
                     self.kill_process = True
                     logger.debug(f"接收到KILL信号, PID {os.getpid()}")
@@ -276,20 +308,33 @@ class ClientProtocol(protocol.Protocol):
     def send_wrapped_data(self, data, sequence_number: int = 0, rank: int = 0, step: int = 0):
         length = len(data)
         md5_hash = hashlib.md5(data).hexdigest() if self.check_sum else ""
+        data_meaasge = length.to_bytes(8, byteorder=bytes_order) + \
+                       sequence_number.to_bytes(8, byteorder=bytes_order) + \
+                       rank.to_bytes(8, byteorder=bytes_order) + \
+                       step.to_bytes(8, byteorder=bytes_order) + \
+                       md5_hash.encode() + \
+                       data
+        logger.debug(f"send 流水号: {sequence_number}; RANK: {rank}; STEP: {step}; LENGTH: {length}")
         while True:
             if self.defer is None or self.defer.called:
-                self.defer = self.send_large_data(
-                    length.to_bytes(8, byteorder='big') +
-                    sequence_number.to_bytes(8, byteorder='big') +
-                    rank.to_bytes(8, byteorder='big') +
-                    step.to_bytes(8, byteorder='big') +
-                    md5_hash.encode() +
-                    data)
+                self.defer = self.send_large_data(data_meaasge)
                 break
             time.sleep(0.01)
     def send_large_data(self, data):
-        d = self.file_sender.beginFileTransfer(io.BytesIO(data), self.transport)
+        if self.tls:
+            self.send_buffer += data
+            self.buffer_cnt += 1
+            if self.buffer_cnt >= MAX_SENDING_QUEUE_SIZE:
+                d = self.file_sender.beginFileTransfer(io.BytesIO(self.send_buffer), self.transport)
+                self.send_buffer = b""
+                self.buffer_cnt = 0
+            else:
+                d = None
+        else:
+            d = self.file_sender.beginFileTransfer(io.BytesIO(data), self.transport)
         return d
     def connection_timeout(self):

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import time
 from collections import namedtuple
@@ -12,6 +27,8 @@ from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import DETAIL_TE
 from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api
 from msprobe.pytorch.common.log import logger
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import move2target_device
+from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_cpu_params
 # NPU vs GPU api list
 CompareApi = set(absolute_standard_api) | set(binary_standard_api) | set(thousandth_standard_api)
@@ -75,7 +92,8 @@ def online_precision_compare(api_data, device, common_config, api_precision_csv_
     try:
         # NPU vs CPU
-        cpu_out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, npu_args, npu_kwargs)
+        cpu_args, cpu_kwargs = generate_cpu_params(npu_args, npu_kwargs, False, api_name)
+        cpu_out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs)
         npu_data_info = UtDataInfo(None, None, npu_out, cpu_out, None, [], None, rank=api_data.rank)
         npu_detail = compare.compare_output(api_full_name, npu_data_info, True)
         npu_data = pd.DataFrame(npu_detail, columns=DETAIL_TEST_ROWS[-1])
@@ -156,7 +174,10 @@ class ConsumerDispatcher:
     def start(self, handle_func, config):
         self.queues = [mp.Queue(maxsize=self.capacity) for _ in range(self.num_workers)]
-        api_precision_csv_file = [ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME, ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME]
+        api_precision_csv_file = [
+            ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME,
+            ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME
+        ]
         common_config = CommonCompareConfig(self.compare, handle_func, config)
         for xpu_id, q in enumerate(self.queues):
             p = mp.Process(name="run_ut_process", target=run_ut_process,
@@ -164,8 +185,10 @@ class ConsumerDispatcher:
             p.start()
             self.processes.append(p)
-        logger.info(f"Api_precision_compare task result will be saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}")
-        logger.info(f"Api_precision_compare task details will be saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
+        logger.info(
+            f'Api_precision_compare task result will be saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}')
+        logger.info(
+            f"Api_precision_compare task details will be saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
         logger.info("Successfully start unittest process.")
     def stop(self):

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py ADDED Viewed

@@ -0,0 +1,110 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from functools import wraps
+import torch
+from torch.utils._python_dispatch import TorchDispatchMode
+from msprobe.pytorch.api_accuracy_checker.common.utils import ApiData
+from msprobe.pytorch.common.utils import get_tensor_rank
+from msprobe.core.common.const import Const
+from msprobe.pytorch.common.log import logger
+from msprobe.core.common.file_utils import load_yaml
+def singleton(cls):
+    _instance = {}
+    @wraps(cls)
+    def inner():
+        if cls not in _instance:
+            _instance[cls] = cls()
+        return _instance[cls]
+    return inner
+@singleton
+class Counter:
+    def __init__(self) -> None:
+        self.index_dict = {}
+counter = Counter()
+yaml_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "torch_ops_config.yaml")
+yaml_file = load_yaml(yaml_path)
+class AccuracyCheckerDispatch(TorchDispatchMode):
+    def __init__(self, attl):
+        super(AccuracyCheckerDispatch, self).__init__()
+        self.attl = attl
+        self.counter = counter
+        self.aten_ops_blacklist = []
+        self.npu_adjust_autogard = []
+        self.aten_ops_blacklist = yaml_file.get('aten_ops_blacklist', [])
+        self.npu_adjust_autogard = yaml_file.get('npu_adjust_autogard', [])
+    def __torch_dispatch__(self, func, types, args=None, kwargs=None):
+        func_name_split_list = func.__name__.split(Const.SEP)
+        aten_api = func_name_split_list[0]
+        self.enable_autogard(aten_api)
+        if aten_api in self.aten_ops_blacklist:
+            npu_out = func(*args, **kwargs)
+            return npu_out
+        res = func(*args, **kwargs)
+        cur_rank = get_tensor_rank(args, res)
+        cur_api_number = self.counter.index_dict.setdefault(aten_api, 0)
+        api_name = f'{Const.ATEN}{Const.SEP}{aten_api}{Const.SEP}{cur_api_number}'
+        logger.info(f"tools is dumping api: {api_name}")
+        api_data = ApiData(api_name, args, kwargs, res, 0, cur_rank)
+        if "device" in api_data.kwargs:
+            api_data.kwargs.pop("device")
+        if self.attl.nfs_path:
+            self.attl.upload(api_data)
+        else:
+            self.attl.send(api_data)
+        self.counter.index_dict[aten_api] += 1
+        return res
+    def enable_autogard(self, aten_api):
+        if aten_api in self.npu_adjust_autogard:
+            torch._C._dispatch_tls_set_dispatch_key_excluded(torch._C.DispatchKey.AutogradFunctionality, False)
+def dispatch4data(func, attl, status):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        if not status:
+            return func(*args, **kwargs)
+        with AccuracyCheckerDispatch(attl):
+            res = func(*args, **kwargs)
+            return res
+    return wrapper
+def run_ut_dispatch(attl, status):
+    """
+    This function called by online_run_ut.
+    It is used to enable or disable dispatch for torch.autograd.backward function.
+    Args:
+        attl (ATTL):  online_run_ut class ATTL, which is used to upload or send api data to server.
+        status (bool): True means enable dispatch, False means disable dispatch.
+    """
+    torch.autograd.backward = dispatch4data(torch.autograd.backward, attl, status)

mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl