PyPI - mindstudio-probe - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
msprobe/README.md +39 -3
msprobe/config.json +1 -3
msprobe/core/advisor/advisor.py +8 -3
msprobe/core/common/const.py +113 -13
msprobe/core/common/exceptions.py +25 -3
msprobe/core/common/file_utils.py +150 -26
msprobe/core/common/inplace_op_checker.py +15 -0
msprobe/core/common/log.py +27 -9
msprobe/core/common/utils.py +182 -69
msprobe/core/common_config.py +44 -15
msprobe/core/compare/acc_compare.py +207 -142
msprobe/core/compare/check.py +2 -5
msprobe/core/compare/compare_cli.py +21 -4
msprobe/core/compare/highlight.py +124 -55
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
msprobe/core/compare/npy_compare.py +52 -23
msprobe/core/compare/utils.py +272 -247
msprobe/core/data_dump/data_collector.py +13 -11
msprobe/core/data_dump/data_processor/base.py +46 -16
msprobe/core/data_dump/data_processor/mindspore_processor.py +4 -4
msprobe/core/data_dump/data_processor/pytorch_processor.py +156 -59
msprobe/core/data_dump/scope.py +113 -34
msprobe/core/grad_probe/constant.py +27 -13
msprobe/core/grad_probe/grad_compare.py +18 -1
msprobe/core/grad_probe/utils.py +30 -2
msprobe/core/overflow_check/abnormal_scene.py +185 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +10 -0
msprobe/docs/02.config_introduction.md +49 -22
msprobe/docs/03.config_examples.md +2 -9
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +3 -1
msprobe/docs/06.data_dump_MindSpore.md +157 -90
msprobe/docs/07.accuracy_checker_PyTorch.md +12 -12
msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +19 -13
msprobe/docs/11.accuracy_compare_MindSpore.md +104 -13
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
msprobe/docs/17.grad_probe.md +5 -6
msprobe/docs/19.monitor.md +468 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +386 -0
msprobe/docs/22.visualization_MindSpore.md +384 -0
msprobe/docs/23.tool_function_introduction.md +28 -0
msprobe/docs/FAQ.md +3 -0
msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/mindspore/__init__.py +15 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +113 -145
msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
msprobe/mindspore/api_accuracy_checker/main.py +27 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +33 -12
msprobe/mindspore/common/const.py +33 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +43 -4
msprobe/mindspore/compare/distributed_compare.py +22 -22
msprobe/mindspore/compare/ms_compare.py +271 -248
msprobe/mindspore/compare/ms_graph_compare.py +81 -47
msprobe/mindspore/debugger/debugger_config.py +4 -1
msprobe/mindspore/debugger/precision_debugger.py +7 -1
msprobe/mindspore/dump/dump_tool_factory.py +3 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +12 -2
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +13 -16
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +25 -0
msprobe/mindspore/dump/jit_dump.py +17 -5
msprobe/mindspore/dump/kernel_graph_dump.py +2 -4
msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +145 -39
msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
msprobe/mindspore/free_benchmark/common/utils.py +19 -4
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
msprobe/mindspore/grad_probe/global_context.py +28 -8
msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +24 -10
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/ms_config.py +22 -15
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +2 -4
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +36 -30
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +24 -7
msprobe/pytorch/__init__.py +3 -2
msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +6 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +19 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +13 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +77 -53
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +15 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +100 -6
msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
msprobe/pytorch/bench_functions/swiglu.py +10 -2
msprobe/pytorch/common/parse_json.py +6 -6
msprobe/pytorch/common/utils.py +56 -5
msprobe/pytorch/compare/distributed_compare.py +8 -9
msprobe/pytorch/compare/pt_compare.py +8 -6
msprobe/pytorch/debugger/debugger_config.py +19 -15
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +15 -0
msprobe/pytorch/free_benchmark/common/params.py +8 -1
msprobe/pytorch/free_benchmark/common/utils.py +26 -4
msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -3
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +10 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
msprobe/pytorch/hook_module/wrap_functional.py +14 -12
msprobe/pytorch/module_processer.py +2 -5
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +340 -0
msprobe/pytorch/monitor/distributed/__init__.py +0 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +870 -0
msprobe/pytorch/monitor/module_metric.py +193 -0
msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
msprobe/pytorch/monitor/optimizer_collect.py +295 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
msprobe/pytorch/monitor/utils.py +250 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +29 -38
msprobe/pytorch/online_dispatch/dispatch.py +50 -25
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +53 -32
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
msprobe/pytorch/online_dispatch/utils.py +49 -21
msprobe/pytorch/parse_tool/lib/compare.py +12 -18
msprobe/pytorch/parse_tool/lib/config.py +1 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +1 -2
msprobe/pytorch/parse_tool/lib/utils.py +16 -35
msprobe/pytorch/parse_tool/lib/visualization.py +2 -0
msprobe/pytorch/pt_config.py +31 -8
msprobe/pytorch/service.py +15 -5
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +165 -0
msprobe/visualization/builder/msprobe_adapter.py +205 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +130 -0
msprobe/visualization/compare/mode_adapter.py +211 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +124 -0
msprobe/visualization/graph/graph.py +200 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +214 -0
msprobe/visualization/utils.py +232 -0
mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
msprobe/docs/04.acl_config_examples.md +0 -78
msprobe/mindspore/compare/layer_mapping.py +0 -146
msprobe/mindspore/compare/modify_mapping.py +0 -107
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0

msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py ADDED Viewed

@@ -0,0 +1,206 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# 标准库导入
+import multiprocessing
+from multiprocessing import Manager
+import os
+import signal
+import sys
+import time
+# 第三方库导入
+from mindspore import context
+import numpy as np
+from tqdm import tqdm
+# 本地应用/库特定导入
+from msprobe.core.common.const import Const, CompareConst, MsCompareConst
+from msprobe.mindspore.api_accuracy_checker.api_accuracy_checker import ApiAccuracyChecker, BasicInfoAndStatus
+from msprobe.mindspore.api_accuracy_checker.multi_data_manager import MultiDataManager
+from msprobe.mindspore.common.log import logger
+class MultiApiAccuracyChecker(ApiAccuracyChecker):
+    def __init__(self, args):
+        # 可以添加 MultiApiAccuracyChecker 特有的属性或方法
+        self.api_infos = dict()
+        # 使用 Manager 创建共享变量，确保进程间的同步
+        self.manager = Manager()
+        self.is_first_write = self.manager.Value('b', True)  # 创建共享变量
+        # 初始化 DataManager 时传入共享的 is_first_write
+        self.multi_data_manager = MultiDataManager(args.out_path, args.result_csv_path, self.is_first_write)
+        self.args = args  # 将 args 保存为类的属性
+        # 初始化一个属性来存储当前的设备ID（用于日志中显示）
+        self.current_device_id = None
+    def process_on_device(self, device_id, api_infos, progress_queue):
+        """
+        在特定设备上处理一部分API。
+        参数:
+            device_id (int): 要使用的设备ID。
+            api_infos (list): 包含API名称和对应信息的元组列表。
+            progress_queue (multiprocessing.Queue): 用于通信进度更新的队列。
+        """
+        # 设置当前设备ID
+        self.current_device_id = device_id
+        # 设置 MindSpore context 的 device_id
+        context.set_context(device_id=device_id)
+        # 遍历当前进程分配的任务
+        for _, (api_name_str, api_info) in enumerate(api_infos):
+            logger.debug(f"Processing API: {api_name_str}, Device: {device_id}")
+            if not self.multi_data_manager.is_unique_api(api_name_str):
+                logger.debug(f"API {api_name_str} is not unique, skipping.")
+                progress_queue.put(1)
+                continue
+            # 处理前向
+            forward_output_list = self.process_forward(api_name_str, api_info)
+            if forward_output_list is not Const.EXCEPTION_NONE:
+                self.multi_data_manager.record(forward_output_list)
+            # 处理反向
+            backward_output_list = self.process_backward(api_name_str, api_info)
+            if backward_output_list is not Const.EXCEPTION_NONE:
+                self.multi_data_manager.record(backward_output_list)
+            # 保存结果
+            self.multi_data_manager.save_results(api_name_str)
+            progress_queue.put(1)  # 更新进度
+    def run_and_compare(self):
+        # 获取要使用的设备ID列表
+        device_ids = self.args.device_id
+        # 按设备数划分要处理的 API 项
+        partitioned_api_infos = list(self.api_infos.items())
+        # 在主进程中进行交叉任务切分（基于取模的方式）
+        partitioned_api_infos_split = [[] for _ in range(len(device_ids))]
+        for idx, api_info in enumerate(partitioned_api_infos):
+            device_index = idx % len(device_ids)  # 使用取模方法分配任务
+            partitioned_api_infos_split[device_index].append(api_info)
+        # 创建一个共享进度队列
+        progress_queue = multiprocessing.Queue()
+        # 进度条
+        total_tasks = len(partitioned_api_infos)  # 计算总任务数
+        with tqdm(total=total_tasks, desc="Total Progress", ncols=100) as pbar:
+            # 创建多进程
+            processes = []
+            for index, device_id in enumerate(device_ids):
+                process = multiprocessing.Process(target=self.process_on_device,
+                                                  args=(device_id, partitioned_api_infos_split[index], progress_queue))
+                processes.append(process)
+                process.start()
+            # 主进程更新进度条
+            completed_tasks = 0
+            while completed_tasks < total_tasks:
+                try:
+                    completed_tasks += progress_queue.get(timeout=Const.PROGRESS_TIMEOUT)  # 设置超时时间（秒）
+                    pbar.update(1)
+                except multiprocessing.queues.Empty:
+                    logger.error("Timeout while waiting for progress updates. Skipping remaining tasks.")
+                    break
+                # 检查子进程状态
+                for process in processes:
+                    if not process.is_alive():
+                        if process.exitcode != 0:
+                            logger.error(f"Process {process.pid} exited with code {process.exitcode}.")
+                            total_tasks -= len(partitioned_api_infos_split[processes.index(process)])
+                        processes.remove(process)
+            # 确保所有子进程完成或终止
+            for process in processes:
+                process.join(timeout=Const.PROGRESS_TIMEOUT)
+                if process.is_alive():
+                    logger.error(f"Process {process.pid} did not terminate. Forcing termination.")
+                    process.terminate()
+    def process_forward(self, api_name_str, api_info):
+        """
+        Overrides the parent class's process_forward method to log the device ID when exceptions occur.
+        Parameters:
+            api_name_str (str): The name of the API.
+            api_info (object): The API information object.
+        Returns:
+            list or None: The forward output list or None if an error occurs.
+        """
+        if not api_info.check_forward_info():
+            logger.debug(
+                f"[Device {self.current_device_id}] API: {api_name_str} lacks forward information, skipping forward check.")
+            return Const.EXCEPTION_NONE
+        try:
+            forward_inputs_aggregation = self.prepare_api_input_aggregation(api_info, Const.FORWARD)
+        except Exception as e:
+            logger.warning(
+                f"[Device {self.current_device_id}] Exception occurred while getting forward API inputs for {api_name_str}. Skipping forward check. Detailed exception information: {e}.")
+            return Const.EXCEPTION_NONE
+        forward_output_list = None
+        try:
+            forward_output_list = self.run_and_compare_helper(api_info, api_name_str, forward_inputs_aggregation,
+                                                              Const.FORWARD)
+        except Exception as e:
+            logger.warning(
+                f"[Device {self.current_device_id}] Exception occurred while running and comparing {api_name_str} forward API. Detailed exception information: {e}.")
+        return forward_output_list
+    def process_backward(self, api_name_str, api_info):
+        """
+        Overrides the parent class's process_backward method to log the device ID when exceptions occur.
+        Parameters:
+            api_name_str (str): The name of the API.
+            api_info (object): The API information object.
+        Returns:
+            list or None: The backward output list or None if an error occurs.
+        """
+        if not api_info.check_backward_info():
+            logger.debug(
+                f"[Device {self.current_device_id}] API: {api_name_str} lacks backward information, skipping backward check.")
+            return Const.EXCEPTION_NONE
+        try:
+            backward_inputs_aggregation = self.prepare_api_input_aggregation(api_info, Const.BACKWARD)
+        except Exception as e:
+            logger.warning(
+                f"[Device {self.current_device_id}] Exception occurred while getting backward API inputs for {api_name_str}. Skipping backward check. Detailed exception information: {e}.")
+            return Const.EXCEPTION_NONE
+        backward_output_list = None
+        try:
+            backward_output_list = self.run_and_compare_helper(api_info, api_name_str, backward_inputs_aggregation,
+                                                               Const.BACKWARD)
+        except Exception as e:
+            logger.warning(
+                f"[Device {self.current_device_id}] Exception occurred while running and comparing {api_name_str} backward API. Detailed exception information: {e}.")
+        return backward_output_list

msprobe/mindspore/api_accuracy_checker/multi_data_manager.py ADDED Viewed

@@ -0,0 +1,58 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import multiprocessing
+import os
+from msprobe.mindspore.api_accuracy_checker.data_manager import DataManager, ResultCsvEntry, write_csv_header, get_result_csv_header, get_detail_csv_header, check_csv_header
+from msprobe.mindspore.common.log import logger
+class MultiDataManager(DataManager):
+    def __init__(self, csv_dir, result_csv_path, shared_is_first_write):
+        super().__init__(csv_dir, result_csv_path)
+        # 使用共享的 is_first_write 变量来控制表头写入
+        self.shared_is_first_write = shared_is_first_write
+        # 创建锁对象，确保线程安全
+        self.lock = multiprocessing.Lock()
+    def save_results(self, api_name_str):
+        """保存结果，线程安全操作"""
+        with self.lock:  # 确保保存操作不会被多个进程同时进行
+            if self.is_first_write and self.shared_is_first_write.value:
+                self.shared_is_first_write.value = False
+                self.is_first_write = False  # 写入后标记为 False，避免重复写入表头
+                # 直接写入表头
+                logger.info("Writing CSV headers for the first time.")
+                write_csv_header(self.detail_out_path, get_detail_csv_header)
+                write_csv_header(self.result_out_path, get_result_csv_header)
+            """写入详细输出和结果摘要并清理结果"""
+            self.to_detail_csv(self.detail_out_path)
+            logger.debug(f"Detailed output for {api_name_str} written to {self.detail_out_path}.")
+            self.to_result_csv(self.result_out_path)
+            logger.debug(f"Result summary for {api_name_str} written to {self.result_out_path}.")
+            # 清理记录，准备下一次调用
+            self.clear_results()
+    def clear_results(self):
+        """清空 self.results 数据，线程安全操作"""
+        logger.debug("Clearing results data.")
+        self.results.clear()

msprobe/mindspore/api_accuracy_checker/type_mapping.py CHANGED Viewed

@@ -1,7 +1,23 @@
-from mindspore.common import dtype as mstype
-import numpy as np
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import mindspore
+import numpy as np
 import torch
+from mindspore._c_expression import typing
+from mindspore.common import dtype as mstype
 INT8 = "Int8"
 UINT8 = "UInt8"
@@ -18,7 +34,6 @@ BOOL = "Bool"
 BFLOAT16 = "BFloat16"
 INT4 = "Int4"
 dtype_str_to_ms_dtype = {
     INT8: mstype.int8,
     UINT8: mstype.uint8,
@@ -37,7 +52,6 @@ dtype_str_to_ms_dtype = {
 }
 ms_dtype_to_dtype_str = {value: key for key, value in dtype_str_to_ms_dtype.items()}
 dtype_str_to_np_dtype = {
     INT8: np.int8,
     UINT8: np.uint8,
@@ -75,6 +89,8 @@ FLOAT_TYPE_STR = "float"
 SLICE_TYPE_STR = "slice"
 TUPLE_TYPE_STR = "tuple"
 STR_TYPE_STR = "str"
+MINDSPORE_DTYPE_TYPE_STR = "mindspore.dtype"
+TORCH_DTYPE_TYPE_STR = "torch.dtype"
 api_info_type_str_to_type = {
     MINDSPORE_TENSOR_TYPE_STR: mindspore.Tensor,
@@ -83,6 +99,7 @@ api_info_type_str_to_type = {
     FLOAT_TYPE_STR: float,
     SLICE_TYPE_STR: slice,
     STR_TYPE_STR: str,
+    MINDSPORE_DTYPE_TYPE_STR: typing.Type,
 }
 type_to_api_info_type_str = {value: key for key, value in api_info_type_str_to_type.items()}
@@ -111,4 +128,4 @@ uint_dtype_str_list = [
     UINT16,
     UINT32,
     UINT64,
-]
+]

msprobe/mindspore/api_accuracy_checker/utils.py CHANGED Viewed

@@ -1,8 +1,24 @@
-from msprobe.core.common.exceptions import ApiAccuracyCheckerException
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import ApiAccuracyCheckerException
 from msprobe.mindspore.api_accuracy_checker.type_mapping import float_dtype_str_list
 from msprobe.mindspore.common.log import logger
 def check_and_get_from_json_dict(dict_instance, key, key_description, accepted_type=None, accepted_value=None):
     '''
     Args:
@@ -22,30 +38,30 @@ def check_and_get_from_json_dict(dict_instance, key, key_description, accepted_t
         3. value is not accepted type
         4. value is not accepted value
     '''
-    parse_failed_exception = ApiAccuracyCheckerException(ApiAccuracyCheckerException.ParseJsonFailed)
     if not isinstance(dict_instance, dict):
-        logger.error_log_with_exp("check_and_get_from_json_dict failed: input is not a dict", parse_failed_exception)
+        error_info = "check_and_get_from_json_dict failed: input is not a dict"
+        raise ApiAccuracyCheckerException(ApiAccuracyCheckerException.ParseJsonFailed, error_info)
     value = dict_instance.get(key)
     if value is None:
-        logger.error_log_with_exp(f"check_and_get_from_json_dict failed: {key_description} is missing",
-                                  parse_failed_exception)
+        error_info = f"check_and_get_from_json_dict failed: {key_description} is missing"
+        raise ApiAccuracyCheckerException(ApiAccuracyCheckerException.ParseJsonFailed, error_info)
     elif accepted_type is not None and not isinstance(value, accepted_type):
-        logger.error_log_with_exp(
-            f"check_and_get_from_json_dict failed: {key_description} is not accepted type: {accepted_type}",
-            parse_failed_exception)
+        error_info = f"check_and_get_from_json_dict failed: {key_description} is not accepted type: {accepted_type}"
+        raise ApiAccuracyCheckerException(ApiAccuracyCheckerException.ParseJsonFailed, error_info)
     elif accepted_value is not None and value not in accepted_value:
-        logger.error_log_with_exp(
-            f"check_and_get_from_json_dict failed: {key_description} is not accepted value: {accepted_value}",
-            parse_failed_exception)
+        error_info = f"check_and_get_from_json_dict failed: {key_description} is not accepted value: {accepted_value}"
+        raise ApiAccuracyCheckerException(ApiAccuracyCheckerException.ParseJsonFailed, error_info)
     return value
-def convert_to_tuple(input):
-    if isinstance(input, (tuple, list)):
-        return tuple(input)
+def convert_to_tuple(args):
+    if isinstance(args, (tuple, list)):
+        return tuple(args)
     else:
-        input_list = [input]
+        input_list = [args]
         return tuple(input_list)
 def trim_output_compute_element_list(compute_element_list, forward_or_backward):
     '''
     Args:
@@ -55,12 +71,13 @@ def trim_output_compute_element_list(compute_element_list, forward_or_backward):
     trimmed_list = []
     for compute_element in compute_element_list:
         if compute_element.get_parameter() is None or \
-            (forward_or_backward == Const.BACKWARD and compute_element.get_dtype() not in float_dtype_str_list):
+                (forward_or_backward == Const.BACKWARD and compute_element.get_dtype() not in float_dtype_str_list):
             # trim case: 1. parameter is None. 2. backward output has non float parameter
             continue
         trimmed_list.append(compute_element)
     return trimmed_list
 class GlobalContext:
     def __init__(self):
         self.is_constructed = True
@@ -77,4 +94,4 @@ class GlobalContext:
         return self.is_constructed
-global_context = GlobalContext()
+global_context = GlobalContext()

msprobe/mindspore/cell_processor.py CHANGED Viewed

@@ -1,4 +1,19 @@
-from msprobe.core.data_dump.scope import ModuleRangeScope
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from msprobe.core.data_dump.scope import ModuleRangeScope, MixRangeScope
 from msprobe.core.common.const import Const
@@ -9,10 +24,7 @@ class CellProcessor:
     module_node = {}
     def __init__(self, scope):
-        if isinstance(scope, ModuleRangeScope):
-            self.scope = scope
-        else:
-            self.scope = None
+        self.scope = scope if isinstance(scope, (ModuleRangeScope, MixRangeScope)) else None
     @staticmethod
     def set_cell_count(cell_name):
@@ -21,30 +33,29 @@ class CellProcessor:
         else:
             CellProcessor.cell_count[cell_name] += 1
         return CellProcessor.cell_count[cell_name]
     @classmethod
     def reset_cell_stats(cls):
         cls.cell_count = {}
         cls.cell_stack = []
         cls.api_parent_node = ""
         cls.module_node = {}
     def node_hook(self, name_prefix, start_or_stop, **kwargs):
-        def begin_hook(cell, input):
-            index = self.set_cell_count(name_prefix)
-            cell.mindstudio_reserved_name = full_name = name_prefix + Const.SEP + str(index)
+        def begin_hook(cell, input_data):
+            full_name = self.set_and_get_reserved_name(cell, name_prefix, is_called_by_pre_hook=True)
             if CellProcessor.cell_stack:
                 CellProcessor.module_node[full_name] = CellProcessor.cell_stack[-1]
             else:
                 CellProcessor.module_node[full_name] = None
             CellProcessor.cell_stack.append(full_name)
             CellProcessor.api_parent_node = full_name
             if self.scope:
                 self.scope.begin_module(full_name)
-        def end_hook(cell, input, output):
+        def end_hook(cell, input_data, output_data):
             if CellProcessor.cell_stack:
                 CellProcessor.cell_stack.pop()
             if CellProcessor.cell_stack:
@@ -56,3 +67,13 @@ class CellProcessor:
                 self.scope.end_module(cell.mindstudio_reserved_name)
         return begin_hook if Const.START == start_or_stop else end_hook
+    def set_and_get_reserved_name(self, cell, cell_name, is_called_by_pre_hook=False):
+        if not is_called_by_pre_hook and hasattr(cell, 'has_pre_hook_called') and cell.has_pre_hook_called:
+            cell.has_pre_hook_called = False
+        else:
+            if is_called_by_pre_hook:
+                cell.has_pre_hook_called = True
+            index = self.set_cell_count(cell_name)
+            cell.mindstudio_reserved_name = cell_name + Const.SEP + str(index)
+        return cell.mindstudio_reserved_name

msprobe/mindspore/common/const.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import numpy as np
 import mindspore as ms
@@ -23,17 +38,10 @@ class Const:
     ASCEND_910A = "ascend910"
     OPS_PREFIX = "mindspore.ops."
-    Tensor_PREFIX = "mindspore.Tensor."
+    TENSOR_PREFIX = "mindspore.Tensor."
     MINT_PREFIX = "mindspore.mint."
     MINT_NN_FUNC_PREFIX = "mindspore.mint.nn.functional."
-    COMM_PREFIX = "mindspore.communication.comm_func."
-    COMMUNICATION_API_LIST = [
-        "mindspore.communication.comm_func.all_gather_into_tensor",
-        "mindspore.communication.comm_func.gather_into_tensor",
-        "mindspore.communication.comm_func.all_reduce",
-        "mindspore.communication.comm_func.reduce",
-        "mindspore.communication.comm_func.reduce_scatter_tensor"
-        ]
     TENSOR_DATA_PREFIX = "Tensor."
     STUB_TENSOR_DATA_PREFIX = "Tensor."
     OPS_DATA_PREFIX = "Functional."
@@ -50,6 +58,15 @@ class Const:
     DROPOUT_API_NAME_PREFIX = "dropout"
+    GRAPH_DATA_MODE_LIST = [CoreConst.ALL, CoreConst.INPUT, CoreConst.OUTPUT]
+    HOOK_MS_PREFIX_DICT = {
+        OPS_DATA_PREFIX: OPS_PREFIX,
+        TENSOR_DATA_PREFIX: TENSOR_PREFIX,
+        MINT_DATA_PREFIX: MINT_PREFIX,
+        MINT_NN_FUNC_DATA_PREFIX: MINT_NN_FUNC_PREFIX
+    }
 class FreeBenchmarkConst:
     ADD_NOISE = "add_noise"
@@ -65,19 +82,21 @@ class FreeBenchmarkConst:
     DEFAULT_PERT_TYPE = IMPROVE_PRECISION
     DEFAULT_HANDLER_TYPE = CHECK
     DEVICE_LIST = [DEFAULT_DEVICE]
-    STAGE_LIST = [CoreConst.FORWARD]
+    STAGE_LIST = [CoreConst.FORWARD, CoreConst.BACKWARD]
     DUMP_LEVEL_LIST = [DEFAULT_DUMP_LEVEL]
     PERT_TYPE_LIST = [IMPROVE_PRECISION, ADD_NOISE, BIT_NOISE, NO_CHANGE, EXCHANGE_VALUE]
     HANDLER_TYPE_LIST = [CHECK, FIX]
     NO_CHANGE_ERROR_THRESHOLD = 1.0
     SYMBOL_FLIPPING_RATIO = 8.0
+    SUPPORTED_CHECK_API_FILE = "support_wrap_ops.yaml"
+    CHECK_RESULT_FILE = "free_benchmark.csv"
     API_PREFIX_DICT = {
         "ops": Const.OPS_PREFIX,
-        "Tensor": Const.Tensor_PREFIX,
+        "Tensor": Const.TENSOR_PREFIX,
         "mint": Const.MINT_PREFIX,
-        "mint.nn.functional": Const.MINT_NN_FUNC_PREFIX,
-        "communication": Const.COMM_PREFIX
+        "mint.nn.functional": Const.MINT_NN_FUNC_PREFIX
     }
     PERT_VALUE_DICT = {
@@ -88,6 +107,7 @@ class FreeBenchmarkConst:
     }
     ERROR_THRESHOLD = {
+        ms.bfloat16: 1.004,
         ms.float16: 1.002,
         ms.float32: 1.0002
     }

msprobe/mindspore/common/log.py CHANGED Viewed

@@ -1,4 +1,5 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,15 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ============================================================================
-import os
-import time
-import sys
-from msprobe.mindspore.common.utils import get_rank_if_initialized
-from msprobe.core.common.log import BaseLogger
 from msprobe.core.common.exceptions import DistributedNotInitializedError
+from msprobe.core.common.log import BaseLogger
+from msprobe.mindspore.common.utils import get_rank_if_initialized
 class MindsporeLogger(BaseLogger):
@@ -35,4 +31,4 @@ class MindsporeLogger(BaseLogger):
         return current_rank
-logger = MindsporeLogger()
+logger = MindsporeLogger()

mindstudio-probe 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl