PyPI - mindstudio-probe - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (177) hide show

{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/METADATA +3 -3
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/RECORD +168 -150
msprobe/README.md +27 -22
msprobe/core/common/const.py +129 -60
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +25 -2
msprobe/core/common/inplace_ops.yaml +1 -0
msprobe/core/common/utils.py +43 -33
msprobe/core/compare/acc_compare.py +43 -74
msprobe/core/compare/check.py +2 -6
msprobe/core/compare/highlight.py +2 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +1 -1
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -1
msprobe/core/compare/merge_result/merge_result.py +16 -9
msprobe/core/compare/merge_result/utils.py +81 -0
msprobe/core/compare/multiprocessing_compute.py +19 -12
msprobe/core/compare/npy_compare.py +30 -12
msprobe/core/compare/utils.py +30 -10
msprobe/core/data_dump/api_registry.py +176 -0
msprobe/core/data_dump/data_collector.py +58 -13
msprobe/core/data_dump/data_processor/base.py +94 -10
msprobe/core/data_dump/data_processor/factory.py +3 -0
msprobe/core/data_dump/data_processor/mindspore_processor.py +33 -33
msprobe/core/data_dump/data_processor/pytorch_processor.py +99 -18
msprobe/core/data_dump/json_writer.py +61 -40
msprobe/core/grad_probe/constant.py +1 -0
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/docs/01.installation.md +27 -1
msprobe/docs/02.config_introduction.md +27 -23
msprobe/docs/03.config_examples.md +24 -0
msprobe/docs/05.data_dump_PyTorch.md +103 -16
msprobe/docs/06.data_dump_MindSpore.md +76 -32
msprobe/docs/07.accuracy_checker_PyTorch.md +11 -1
msprobe/docs/08.accuracy_checker_online_PyTorch.md +3 -1
msprobe/docs/09.accuracy_checker_MindSpore.md +5 -3
msprobe/docs/10.accuracy_compare_PyTorch.md +59 -33
msprobe/docs/11.accuracy_compare_MindSpore.md +40 -16
msprobe/docs/12.overflow_check_PyTorch.md +3 -1
msprobe/docs/13.overflow_check_MindSpore.md +4 -2
msprobe/docs/14.data_parse_PyTorch.md +1 -7
msprobe/docs/18.online_dispatch.md +1 -1
msprobe/docs/19.monitor.md +332 -273
msprobe/docs/21.visualization_PyTorch.md +42 -13
msprobe/docs/22.visualization_MindSpore.md +43 -13
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/27.dump_json_instruction.md +301 -27
msprobe/docs/28.debugger_save_instruction.md +94 -0
msprobe/docs/28.kernel_dump_MindSpore.md +69 -0
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/step_count_per_record.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +4 -2
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +32 -7
msprobe/mindspore/api_accuracy_checker/api_runner.py +70 -22
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +602 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +47 -1
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -1
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +2 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +130 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +24 -1
msprobe/mindspore/api_accuracy_checker/utils.py +6 -1
msprobe/mindspore/common/const.py +61 -0
msprobe/mindspore/common/utils.py +48 -18
msprobe/mindspore/compare/ms_compare.py +27 -19
msprobe/mindspore/compare/ms_graph_compare.py +6 -5
msprobe/mindspore/debugger/debugger_config.py +31 -6
msprobe/mindspore/debugger/precision_debugger.py +45 -14
msprobe/mindspore/dump/dump_tool_factory.py +5 -3
msprobe/mindspore/dump/hook_cell/api_register.py +142 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +9 -10
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +24 -26
msprobe/mindspore/dump/jit_dump.py +21 -15
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +22 -56
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -1
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +10 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +2 -0
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/monitor/anomaly_detect.py +404 -0
msprobe/mindspore/monitor/distributed/__init__.py +0 -0
msprobe/mindspore/monitor/distributed/distributed_ops.yaml +15 -0
msprobe/mindspore/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +300 -0
msprobe/mindspore/monitor/features.py +63 -0
msprobe/mindspore/monitor/module_hook.py +873 -0
msprobe/mindspore/monitor/module_spec_verifier.py +94 -0
msprobe/mindspore/monitor/utils.py +309 -0
msprobe/mindspore/ms_config.py +8 -2
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/service.py +114 -34
msprobe/pytorch/__init__.py +0 -1
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +12 -7
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +4 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +5 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +25 -6
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -19
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/bench_functions/apply_adam.py +215 -0
msprobe/pytorch/bench_functions/group_norm_silu.py +27 -0
msprobe/pytorch/{parse.py → bench_functions/mish.py} +6 -4
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +50 -0
msprobe/pytorch/bench_functions/sort_v2.py +21 -0
msprobe/pytorch/common/utils.py +97 -4
msprobe/pytorch/debugger/debugger_config.py +19 -9
msprobe/pytorch/debugger/precision_debugger.py +24 -1
msprobe/pytorch/dump/module_dump/module_dump.py +4 -3
msprobe/pytorch/dump/module_dump/module_processer.py +21 -35
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +8 -2
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/hook_module/api_register.py +131 -0
msprobe/pytorch/hook_module/hook_module.py +19 -14
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +173 -75
msprobe/pytorch/monitor/anomaly_detect.py +14 -29
msprobe/pytorch/monitor/csv2tb.py +18 -14
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +238 -193
msprobe/pytorch/monitor/module_metric.py +9 -6
msprobe/pytorch/monitor/optimizer_collect.py +100 -67
msprobe/pytorch/monitor/unittest/test_monitor.py +1 -1
msprobe/pytorch/monitor/utils.py +76 -44
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +9 -0
msprobe/pytorch/online_dispatch/dump_compare.py +3 -0
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +2 -1
msprobe/pytorch/pt_config.py +30 -29
msprobe/pytorch/service.py +114 -32
msprobe/visualization/builder/graph_builder.py +75 -10
msprobe/visualization/builder/msprobe_adapter.py +7 -6
msprobe/visualization/compare/graph_comparator.py +42 -38
msprobe/visualization/compare/mode_adapter.py +0 -19
msprobe/visualization/graph/base_node.py +11 -3
msprobe/visualization/graph/distributed_analyzer.py +71 -3
msprobe/visualization/graph/graph.py +0 -11
msprobe/visualization/graph/node_op.py +4 -3
msprobe/visualization/graph_service.py +4 -5
msprobe/visualization/utils.py +12 -35
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -205
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -75
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/top_level.txt +0 -0

msprobe/mindspore/debugger/precision_debugger.py CHANGED Viewed

@@ -22,12 +22,12 @@ from mindspore._c_expression import MSContext
 from msprobe.core.common.const import Const, FileCheckConst, MsgConst
 from msprobe.core.common.exceptions import MsprobeException
 from msprobe.core.common.file_utils import FileChecker
-from msprobe.core.common.utils import get_real_step_or_rank
+from msprobe.core.common.utils import get_real_step_or_rank, check_init_step
 from msprobe.mindspore.cell_processor import CellProcessor
 from msprobe.mindspore.common.const import Const as MsConst
-from msprobe.mindspore.common.utils import set_register_backward_hook_functions
+from msprobe.mindspore.common.utils import set_register_backward_hook_functions, check_save_param
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
-from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+from msprobe.mindspore.dump.hook_cell.api_register import get_api_register
 from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell
 from msprobe.mindspore.grad_probe.grad_monitor import GradientMonitor
 from msprobe.mindspore.ms_config import parse_json_config
@@ -84,11 +84,12 @@ class PrecisionDebugger:
         common_config.dump_path = dump_path if dump_path else common_config.dump_path
         self.config = DebuggerConfig(common_config, task_config)
-        if _msprobe_c:
+        if self._need_msprobe_c() and _msprobe_c:
             _msprobe_c._PrecisionDebugger(framework="MindSpore", config_path=config_path)
         self.config.execution_mode = self._get_execution_mode()
         if self._need_service():
+            self.config.check_config_with_l2()
             self.service = Service(self.config)
         Runtime.step_count = 0
@@ -139,18 +140,18 @@ class PrecisionDebugger:
     def _is_graph_dump(config):
         if config.level != MsConst.KERNEL:
             return False
-        if not config.list or len(config.list) > 1:
+        if not config.list:
             return True
-        if '-' in config.list[0] or '/' in config.list[0]:
-            return True
-        return False
+        is_graph = any(item.startswith("name-regex") for item in config.list)
+        is_graph |= all("." not in item for item in config.list)
+        return is_graph
     @classmethod
     def start(cls, model=None):
         instance = cls._instance
         if not instance:
             raise Exception(MsgConst.NOT_CREATED_INSTANCE)
-        if _msprobe_c:
+        if cls._need_msprobe_c() and _msprobe_c:
             _msprobe_c._PrecisionDebugger().start()
         if instance.task in PrecisionDebugger.task_not_need_service:
             return
@@ -162,7 +163,7 @@ class PrecisionDebugger:
             instance.service.start(model)
         else:
             if not instance.first_start:
-                api_register.api_set_ori_func()
+                get_api_register().restore_all_api()
                 handler = TaskHandlerFactory.create(instance.config)
                 handler.handle()
@@ -179,8 +180,6 @@ class PrecisionDebugger:
         instance = cls._instance
         if not instance:
             raise Exception(MsgConst.NOT_CREATED_INSTANCE)
-        if _msprobe_c:
-            _msprobe_c._PrecisionDebugger().stop()
         if instance.task == Const.GRAD_PROBE:
             instance.gm.stop()
         if instance.task in PrecisionDebugger.task_not_need_service:
@@ -194,8 +193,6 @@ class PrecisionDebugger:
         instance = cls._instance
         if not instance:
             raise Exception(MsgConst.NOT_CREATED_INSTANCE)
-        if _msprobe_c:
-            _msprobe_c._PrecisionDebugger().step()
         if instance.task in PrecisionDebugger.task_not_need_service:
             return
         if instance.service:
@@ -214,6 +211,33 @@ class PrecisionDebugger:
             return
         instance.gm.monitor(opt)
+    @classmethod
+    def save(cls, variable, name, save_backward=True):
+        instance = cls._instance
+        if not instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
+        if instance.task not in [Const.TENSOR, Const.STATISTICS] or instance.config.level_ori != Const.LEVEL_DEBUG:
+            return
+        try:
+            check_save_param(variable, name, save_backward)
+        except ValueError:
+            return
+        instance.config.execution_mode = cls._get_execution_mode()
+        if cls._need_service():
+            if not instance.service:
+                instance.service = Service(instance.config)
+            instance.service.save(variable, name, save_backward)
+    @classmethod
+    def set_init_step(cls, step):
+        instance = cls._instance
+        if not instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
+        check_init_step(step)
+        instance.service.init_step = step
+        instance.service.loop = 0
     @classmethod
     def _need_service(cls):
         instance = cls._instance
@@ -223,3 +247,10 @@ class PrecisionDebugger:
             return False
         else:
             return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config)
+    @classmethod
+    def _need_msprobe_c(cls):
+        instance = cls._instance
+        if not instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
+        return instance.config.level_ori == Const.LEVEL_L2

msprobe/mindspore/dump/dump_tool_factory.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,6 +14,7 @@
 # limitations under the License.
 from msprobe.mindspore.common.const import Const
+from msprobe.core.common.log import logger
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
 from msprobe.mindspore.dump.kernel_graph_dump import KernelGraphDump
 from msprobe.mindspore.dump.kernel_kbyk_dump import KernelKbykDump
@@ -47,6 +48,7 @@ class DumpToolFactory:
             raise Exception("Valid level is needed.")
         tool = tool.get(config.execution_mode)
         if not tool:
-            raise Exception(f"Data dump is not supported in {config.execution_mode} mode "
-                            f"when dump level is {config.level}.")
+            logger.error(f"Data dump is not supported in {config.execution_mode} mode "
+                         f"when dump level is {config.level}.")
+            raise ValueError
         return tool(config)

msprobe/mindspore/dump/hook_cell/api_register.py ADDED Viewed

@@ -0,0 +1,142 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from mindspore import Tensor, ops, mint
+from mindspore.mint.nn import functional
+from mindspore.communication import comm_func
+from msprobe.core.common.file_utils import load_yaml
+from msprobe.core.common.utils import Const
+from msprobe.core.data_dump.api_registry import ApiRegistry
+from msprobe.mindspore.common.const import Const as MsConst
+from msprobe.mindspore.common.utils import is_mindtorch
+from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell
+stub_tensor_existed = True
+try:
+    from mindspore.common._stub_tensor import StubTensor
+except ImportError:
+    stub_tensor_existed = False
+cur_path = os.path.dirname(os.path.realpath(__file__))
+if not is_mindtorch():
+    _api_types = {
+        Const.MS_FRAMEWORK: {
+            Const.MS_API_TYPE_OPS: (ops, (ops,)),
+            Const.MS_API_TYPE_TENSOR: (Tensor, (Tensor,)),
+            Const.MS_API_TYPE_MINT: (mint, (mint,)),
+            Const.MS_API_TYPE_MINT_FUNC: (functional, (functional,)),
+            Const.MS_API_TYPE_COM: (comm_func, (comm_func,))
+        }
+    }
+    if stub_tensor_existed:
+        _api_types.get(Const.MS_FRAMEWORK).update(
+            {Const.MS_API_TYPE_STUB_TENSOR: (StubTensor, (StubTensor,))}
+        )
+    _supported_api_list_path = (os.path.join(cur_path, MsConst.SUPPORTED_API_LIST_FILE),)
+else:
+    import torch
+    import torch_npu
+    _api_types = {
+        Const.MT_FRAMEWORK: {
+            Const.PT_API_TYPE_FUNCTIONAL: (torch.nn.functional, (torch.nn.functional,)),
+            Const.PT_API_TYPE_TENSOR: (torch.Tensor, (torch.Tensor,)),
+            Const.PT_API_TYPE_TORCH: (torch, (torch,)),
+            Const.PT_API_TYPE_NPU: (torch_npu, (torch_npu,)),
+            Const.PT_API_TYPE_DIST: (torch.distributed, (torch.distributed, torch.distributed.distributed_c10d))
+        }
+    }
+    _supported_api_list_path = (os.path.join(cur_path, '../../../pytorch/hook_module',
+                                             MsConst.SUPPORTED_API_LIST_FILE),)
+_inner_used_api = {
+    Const.MS_FRAMEWORK + Const.SEP + Const.MS_API_TYPE_OPS: (
+        ops, "norm", "square", "sqrt", "is_complex", "stack", "is_floating_point"
+    ),
+    Const.MS_FRAMEWORK + Const.SEP + Const.MS_API_TYPE_TENSOR: (
+        Tensor, "to", "numel"
+    ),
+    Const.MS_FRAMEWORK + Const.SEP + Const.MS_API_TYPE_MINT: (
+        mint, "max", "min", "mean", "norm"
+    )
+}
+class ApiTemplate(HOOKCell):
+    def __init__(self, api_name, api_func, prefix, hook_build_func):
+        self.api_name = api_name
+        self.api_func = api_func
+        self.prefix_api_name = prefix + Const.SEP + str(api_name.split(Const.SEP)[-1]) + Const.SEP
+        super().__init__(hook_build_func)
+    @staticmethod
+    def async_to_sync(output):
+        # Fake handle, used to return after the CommHandle executes the wait method
+        fake_handle = type("FakeHandle", (), {"wait": lambda self: None})()
+        if isinstance(output, tuple) and len(output) == 2 and hasattr(output[1], "wait"):
+            output[1].wait()
+            output = (output[0], fake_handle)
+        elif hasattr(output, "wait"):
+            output.wait()
+            output = fake_handle
+        return output
+    def construct(self, *args, **kwargs):
+        if self.api_name.startswith(MsConst.DROPOUT_API_NAME_PREFIX):
+            return args[0] if args else kwargs.get(Const.INPUT)
+        output = self.api_func(*args, **kwargs)
+        if self.prefix_api_name.startswith(MsConst.DISTRIBUTED_DATA_PREFIX):
+            if kwargs.get("async_op") or self.api_name in ["isend", "irecv"]:
+                output = self.async_to_sync(output)
+        return output
+    def forward(self, *args, **kwargs):
+        if self.api_name.startswith(MsConst.DROPOUT_API_NAME_PREFIX):
+            return args[0] if args else kwargs.get(Const.INPUT)
+        return self.api_func(*args, **kwargs)
+api_register = None
+stub_tensor_set = False
+def get_api_register(return_new=False):
+    global stub_tensor_set
+    def stub_method(method):
+        def wrapped_method(*args, **kwargs):
+            return method(*args, **kwargs)
+        return wrapped_method
+    if not is_mindtorch() and stub_tensor_existed and not stub_tensor_set:
+        api_names = load_yaml(_supported_api_list_path[0]).get(Const.MS_API_TYPE_TENSOR, [])
+        for attr_name in dir(StubTensor):
+            attr = getattr(StubTensor, attr_name)
+            if attr_name in api_names and callable(attr):
+                setattr(StubTensor, attr_name, stub_method(attr))
+        stub_tensor_set = True
+    if return_new:
+        return ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate)
+    global api_register
+    if api_register is None:
+        api_register = ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate)
+    return api_register

msprobe/mindspore/dump/hook_cell/hook_cell.py CHANGED Viewed

@@ -28,23 +28,22 @@ def get_cell_count(name):
     return HOOKCell.cell_count[name]
-def __init__(self, build_hook) -> None:
+def __init__(self, hook_build_func) -> None:
     super(HOOKCell, self).__init__()
     self.changed_status = False
     self.input_kwargs = {}
-    self.prefix = ""
     if not HOOKCell.g_stop_hook:
         HOOKCell.g_stop_hook = True
         self.changed_status = True
-        if hasattr(self, "prefix_api_name"):
-            self.prefix = self.prefix_api_name
         self.forward_data_collected = False
-        forward_pre_hook, forward_hook, backward_hook, backward_pre_hook = build_hook(self.prefix)
-        self.register_forward_pre_hook(forward_pre_hook)
-        self.register_forward_hook(forward_hook)
-        register_backward_hook_functions["full"](self, backward_hook)
-        register_backward_hook_functions["pre"](self, backward_pre_hook)
+        prefix = self.prefix_api_name if hasattr(self, "prefix_api_name") else ""
+        if callable(hook_build_func):
+            forward_pre_hook, forward_hook, backward_hook, backward_pre_hook = hook_build_func(prefix)
+            self.register_forward_pre_hook(forward_pre_hook)
+            self.register_forward_hook(forward_hook)
+            register_backward_hook_functions["full"](self, backward_hook)
+            register_backward_hook_functions["pre"](self, backward_pre_hook)
 # 重载call，加全局标志。

msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml CHANGED Viewed

@@ -564,15 +564,15 @@ tensor:
   - all
   - amax
   - amin
+  - angle
   - any
   - arccos
   - arccosh
-  - argmax
-  - angle
   - arcsin
   - arcsinh
   - arctan
   - arctanh
+  - argmax
   - argmin
   - argsort
   - asin
@@ -582,19 +582,23 @@ tensor:
   - atanh
   - baddbmm
   - bernoulli
+  - bfloat16
   - bincount
   - bitwise_and
   - bitwise_or
   - bitwise_xor
   - bmm
   - bool
+  - bool astype
   - broadcast_to
+  - byte
   - ceil
-  - cholesky_solve
   - cholesky
+  - cholesky_solve
   - clamp
   - clip
   - conj
+  - copy
   - copysign
   - cos
   - cosh
@@ -606,11 +610,13 @@ tensor:
   - deg2rad
   - diag
   - diagflat
+  - diagonal
   - diff
   - digamma
   - div
   - div_
   - divide
+  - double
   - equal
   - erf
   - erfc
@@ -618,13 +624,16 @@ tensor:
   - exp
   - expand_as
   - expm1
+  - flatten
   - flip
   - fliplr
   - flipud
+  - float
   - float_power
   - floor
   - fmod
   - frac
+  - from_numpy
   - gather_elements
   - ge
   - geqrf
@@ -648,12 +657,12 @@ tensor:
   - inner
   - int
   - inverse
+  - is_complex
+  - is_signed
   - isclose
   - isfinite
   - isinf
   - isnan
-  - is_complex
-  - is_signed
   - isneginf
   - isposinf
   - isreal
@@ -704,28 +713,27 @@ tensor:
   - new_ones
   - new_zeros
   - nextafter
-  - norm
   - nonzero
+  - norm
   - not_equal
   - ormqr
   - permute
   - pow
   - prod
   - qr
+  - rad2deg
   - ravel
   - real
   - reciprocal
   - remainder
   - renorm
-  - rad2deg
-  - tile
   - repeat_interleave
   - reshape
   - reshape
-  - round
+  - resize
   - rot90
+  - round
   - rsqrt
-  - sum_to_size
   - scatter
   - sgn
   - short
@@ -745,7 +753,8 @@ tensor:
   - sub
   - sub_
   - subtract
-  - subtract
+  - sum
+  - sum_to_size
   - svd
   - swapaxes
   - swapdims
@@ -753,13 +762,13 @@ tensor:
   - take
   - tan
   - tanh
-  - trace
-  - swapaxes
+  - tensor_split
   - tile
+  - to
   - topk
-  - tril
-  - tensor_split
+  - trace
   - transpose
+  - tril
   - true_divide
   - trunc
   - unbind
@@ -769,17 +778,6 @@ tensor:
   - view
   - where
   - xlogy
-  - from_numpy
-  - std
-  - take
-  - var
-  - all
-  - any
-  - copy
-  - diagonal
-  - flatten
-  - resize
-  - sum
 mint.ops:
   - abs

msprobe/mindspore/dump/jit_dump.py CHANGED Viewed

@@ -16,15 +16,20 @@
 import os
 from collections import defaultdict
-from mindspore import Tensor
+import mindspore
 from mindspore._c_expression import PyNativeExecutor_
-from mindspore.common.api import _MindsporeFunctionExecutor
+try:
+    from mindspore.common.api import _MindsporeFunctionExecutor
+except ImportError:
+    from mindspore.common.api import _JitExecutor as _MindsporeFunctionExecutor
 from msprobe.core.common.log import logger
-from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs
 from msprobe.core.common.const import Const
-from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs
-from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs
+from msprobe.mindspore.dump.hook_cell.api_register import get_api_register
+_api_register = get_api_register()
 def dump_jit(name, in_feat, out_feat, is_forward):
@@ -40,8 +45,8 @@ def dump_jit(name, in_feat, out_feat, is_forward):
     if JitDump.need_dump():
         if is_forward:
             JitDump.jit_count[result] += 1
-            name_template = Const.JIT + Const.SEP + result + Const.SEP + str(JitDump.jit_count[result]) + Const.SEP + \
-                            Const.FORWARD
+            name_template = (Const.JIT + Const.SEP + result + Const.SEP +
+                             str(JitDump.jit_count[result]) + Const.SEP + Const.FORWARD)
             JitDump.data_collector.update_api_or_module_name(name_template)
             module_input_output = ModuleForwardInputsOutputs(args=in_feat, kwargs={}, output=out_feat)
             JitDump.data_collector.forward_data_collect(name_template, None, pid, module_input_output)
@@ -56,7 +61,7 @@ def dump_jit(name, in_feat, out_feat, is_forward):
 class JitDump(_MindsporeFunctionExecutor):
     dump_config = None
     jit_enable = False
-    jit_dump_switch = True
+    jit_dump_switch = False
     jit_count = defaultdict(int)
     def __init__(self, *args, **kwargs):
@@ -67,8 +72,7 @@ class JitDump(_MindsporeFunctionExecutor):
         self._executor = PyNativeExecutor_.get_instance()
     def __call__(self, *args, **kwargs):
-        if JitDump.jit_dump_switch:
-            api_register.api_set_ori_func()
+        _api_register.restore_all_api()
         out = super().__call__(*args, **kwargs)
         if JitDump.jit_dump_switch and len(args) > 0:
             if self.name and self.name != "construct":
@@ -78,8 +82,7 @@ class JitDump(_MindsporeFunctionExecutor):
             JitDump.jit_enable = True
         elif len(args) == 0:
             logger.warning(f"The jit function {self.name} has no input arguments, nothing will be dumped.")
-        if JitDump.jit_dump_switch:
-            api_register.api_set_hook_func()
+        _api_register.register_all_api()
         return out
     @classmethod
@@ -100,9 +103,12 @@ class JitDump(_MindsporeFunctionExecutor):
     def grad(self, obj, grad, weights, grad_position, *args, **kwargs):
         if JitDump.jit_dump_switch and JitDump.jit_enable:
-            api_register.api_set_ori_func()
-        output = self._executor.grad(grad, obj, weights, grad_position, *args, *(kwargs.values()))
+            _api_register.restore_all_api()
+        if mindspore.__version__ >= "2.5":
+            output = self._executor.grad(grad, obj, weights, grad_position, False, *args, *(kwargs.values()))
+        else:
+            output = self._executor.grad(grad, obj, weights, grad_position, *args, *(kwargs.values()))
         if JitDump.jit_dump_switch and JitDump.jit_enable:
             dump_jit(obj, args, None, False)
-            api_register.api_set_hook_func()
+            _api_register.register_all_api()
         return output

msprobe/mindspore/dym_loader/hook_dynamic_loader.cc CHANGED Viewed

@@ -18,37 +18,10 @@
 #include <sys/stat.h>
 #include <cstdlib>
 #include <cstring>
+#include <pybind11/embed.h>
 #include "utils/log_adapter.h"
-namespace {
-// Utility function to check if a file path is valid
-bool IsValidPath(const std::string &path) {
-  struct stat fileStat;
-  if (stat(path.c_str(), &fileStat) != 0) {
-    MS_LOG(ERROR) << "File does not exist or cannot be accessed: " << path;
-    return false;
-  }
-  if (S_ISLNK(fileStat.st_mode)) {
-    MS_LOG(ERROR) << "File is a symbolic link, which is not allowed: " << path;
-    return false;
-  }
-  if (!S_ISREG(fileStat.st_mode)) {
-    MS_LOG(ERROR) << "File is not a regular file: " << path;
-    return false;
-  }
-  if (path.substr(path.find_last_of(".")) != ".so") {
-    MS_LOG(ERROR) << "File is not a .so file: " << path;
-    return false;
-  }
-  return true;
-}
-}  // namespace
+namespace py = pybind11;
 HookDynamicLoader &HookDynamicLoader::GetInstance() {
   static HookDynamicLoader instance;
@@ -65,38 +38,31 @@ bool HookDynamicLoader::loadFunction(void *handle, const std::string &functionNa
   return true;
 }
-bool HookDynamicLoader::validateLibraryPath(const std::string &libPath) {
-  char *realPath = realpath(libPath.c_str(), nullptr);
-  if (!realPath) {
-    MS_LOG(WARNING) << "Failed to resolve realpath for the library: " << libPath;
-    return false;
-  }
-  bool isValid = IsValidPath(realPath);
-  free(realPath);  // Free memory allocated by realpath
-  return isValid;
-}
 bool HookDynamicLoader::LoadLibrary() {
-  const char *libPath = std::getenv("HOOK_TOOL_PATH");
-  if (!libPath) {
-    MS_LOG(WARNING) << "HOOK_TOOL_PATH is not set!";
-    return false;
-  }
-  std::string resolvedLibPath(libPath);
-  if (!validateLibraryPath(resolvedLibPath)) {
-    MS_LOG(WARNING) << "Library path validation failed.";
-    return false;
-  }
+  std::string msprobePath = "";
+  // 获取gil锁
+  py::gil_scoped_acquire acquire;
+  try {
+    py::module msprobeMod = py::module::import("msprobe.lib._msprobe_c");
+		if (!py::hasattr(msprobeMod, "__file__")) {
+			MS_LOG(WARNING) << "Adump mod not found";
+			return false;
+		}
+		msprobePath = msprobeMod.attr("__file__").cast<std::string>();
+  } catch (const std::exception& e) {
+		MS_LOG(WARNING) << "Adump mod path unable to get: " << e.what();
+		return false;
+	}
   std::lock_guard<std::mutex> lock(mutex_);
   if (handle_) {
     MS_LOG(WARNING) << "Hook library already loaded!";
     return false;
   }
-  handle_ = dlopen(resolvedLibPath.c_str(), RTLD_LAZY | RTLD_LOCAL);
+	if (msprobePath == "") {
+		MS_LOG(WARNING) << "Adump path not loaded";
+		return false;
+	}
+  handle_ = dlopen(msprobePath.c_str(), RTLD_LAZY | RTLD_LOCAL);
   if (!handle_) {
     MS_LOG(WARNING) << "Failed to load Hook library: " << dlerror();
     return false;
@@ -104,7 +70,7 @@ bool HookDynamicLoader::LoadLibrary() {
   for (const auto &functionName : functionList_) {
     if (!loadFunction(handle_, functionName)) {
-      MS_LOG(WARNING) << "Failed to load function: " << functionName;
+      MS_LOG(WARNING) << "Failed to load adump function";
       dlclose(handle_);
       handle_ = nullptr;
       return false;

msprobe/mindspore/dym_loader/hook_dynamic_loader.h CHANGED Viewed

@@ -40,7 +40,6 @@ class HookDynamicLoader {
  private:
   // Helper functions
   bool loadFunction(void *handle, const std::string &functionName);
-  bool validateLibraryPath(const std::string &libPath);
   HookDynamicLoader() = default;

mindstudio-probe 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

mindstudio-probe 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl