PyPI - mindstudio-probe - Versions diffs - 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

mindstudio-probe 1.2.1py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/METADATA +1 -1
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/RECORD +85 -66
msprobe/README.md +2 -2
msprobe/core/common/const.py +34 -9
msprobe/core/common/inplace_ops.yaml +1 -0
msprobe/core/common/utils.py +14 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +1 -1
msprobe/core/compare/merge_result/merge_result.py +8 -7
msprobe/core/compare/merge_result/utils.py +81 -0
msprobe/core/compare/utils.py +10 -0
msprobe/core/data_dump/data_collector.py +58 -13
msprobe/core/data_dump/data_processor/base.py +92 -8
msprobe/core/data_dump/data_processor/factory.py +3 -0
msprobe/core/data_dump/data_processor/mindspore_processor.py +17 -4
msprobe/core/data_dump/data_processor/pytorch_processor.py +58 -7
msprobe/core/data_dump/json_writer.py +26 -8
msprobe/docs/01.installation.md +25 -0
msprobe/docs/02.config_introduction.md +14 -12
msprobe/docs/03.config_examples.md +24 -0
msprobe/docs/05.data_dump_PyTorch.md +34 -15
msprobe/docs/06.data_dump_MindSpore.md +45 -22
msprobe/docs/09.accuracy_checker_MindSpore.md +4 -2
msprobe/docs/19.monitor.md +257 -260
msprobe/docs/21.visualization_PyTorch.md +10 -0
msprobe/docs/22.visualization_MindSpore.md +11 -0
msprobe/docs/27.dump_json_instruction.md +24 -20
msprobe/docs/28.debugger_save_instruction.md +94 -0
msprobe/docs/28.kernel_dump_MindSpore.md +69 -0
msprobe/docs/img/monitor/step_count_per_record.png +0 -0
msprobe/mindspore/__init__.py +1 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +26 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +54 -16
msprobe/mindspore/api_accuracy_checker/compute_element.py +47 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +129 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +24 -1
msprobe/mindspore/api_accuracy_checker/utils.py +6 -1
msprobe/mindspore/common/utils.py +20 -2
msprobe/mindspore/debugger/debugger_config.py +25 -2
msprobe/mindspore/debugger/precision_debugger.py +25 -6
msprobe/mindspore/dump/hook_cell/api_registry.py +2 -0
msprobe/mindspore/dump/jit_dump.py +7 -6
msprobe/mindspore/monitor/anomaly_detect.py +404 -0
msprobe/mindspore/monitor/distributed/__init__.py +0 -0
msprobe/mindspore/monitor/distributed/distributed_ops.yaml +15 -0
msprobe/mindspore/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +300 -0
msprobe/mindspore/monitor/features.py +63 -0
msprobe/mindspore/monitor/module_hook.py +821 -0
msprobe/mindspore/monitor/module_spec_verifier.py +94 -0
msprobe/mindspore/monitor/utils.py +267 -0
msprobe/mindspore/ms_config.py +8 -2
msprobe/mindspore/service.py +95 -21
msprobe/pytorch/__init__.py +0 -1
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +1 -1
msprobe/pytorch/bench_functions/apply_adam.py +215 -0
msprobe/pytorch/bench_functions/group_norm_silu.py +27 -0
msprobe/pytorch/bench_functions/mish.py +21 -0
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +44 -0
msprobe/pytorch/bench_functions/sort_v2.py +21 -0
msprobe/pytorch/common/utils.py +71 -0
msprobe/pytorch/debugger/debugger_config.py +19 -9
msprobe/pytorch/debugger/precision_debugger.py +14 -0
msprobe/pytorch/dump/module_dump/module_processer.py +10 -30
msprobe/pytorch/function_factory.py +7 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +2 -1
msprobe/pytorch/hook_module/wrap_distributed.py +4 -0
msprobe/pytorch/monitor/anomaly_detect.py +14 -29
msprobe/pytorch/monitor/csv2tb.py +10 -12
msprobe/pytorch/monitor/module_hook.py +123 -104
msprobe/pytorch/monitor/module_metric.py +6 -6
msprobe/pytorch/monitor/optimizer_collect.py +45 -63
msprobe/pytorch/monitor/utils.py +8 -43
msprobe/pytorch/pt_config.py +19 -22
msprobe/pytorch/service.py +103 -24
msprobe/visualization/builder/graph_builder.py +31 -5
msprobe/visualization/builder/msprobe_adapter.py +7 -5
msprobe/visualization/graph/base_node.py +3 -2
msprobe/visualization/graph/distributed_analyzer.py +80 -3
msprobe/visualization/graph/node_op.py +4 -2
msprobe/visualization/graph_service.py +3 -4
msprobe/visualization/utils.py +10 -2
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/top_level.txt +0 -0

msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py ADDED Viewed

@@ -0,0 +1,129 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import gc
+import sys
+from pathlib import Path
+import mindspore
+from msprobe.mindspore.common.log import logger
+from msprobe.core.common.const import Const, CompareConst, MsCompareConst
+import torch as mindtorch
+from torch import Tensor as mindtorch_tensor
+import torch.nn.functional as mindtorch_func
+import torch.distributed as mindtorch_dist
+is_valid_pt_mt_env = True
+def is_mindtorch():
+    mindtorch_check_result = False
+    try:
+        import torch as test_torch
+        from mindspore import Tensor as MindsporeTensor
+    except ImportError:
+        return mindtorch_check_result
+    tensor = test_torch.tensor(0.0)
+    if isinstance(tensor, MindsporeTensor):
+        mindtorch_check_result = True
+    return mindtorch_check_result
+def remove_torch_related_paths():
+    removed_paths = []
+    if not is_mindtorch():
+        return
+    try:
+        import torch as remove_torch
+        torch_file = remove_torch.__file__
+    except ImportError:
+        return
+    torch_dir = os.path.dirname(torch_file)
+    torch_dir_path = Path(torch_dir).resolve()
+    parent_dir = torch_dir_path.parent
+    paths_to_remove = [str(parent_dir)]
+    for path in paths_to_remove:
+        try:
+            path_resolved = str(Path(path).resolve())
+        except Exception as error:
+            logger.debug(f"Failed to resolve path {path}: {error}")
+            continue
+        if path_resolved in sys.path:
+            index = sys.path.index(path_resolved)
+            removed_paths.append((path_resolved, index))
+            sys.path.pop(index)
+    return
+def clear_torch_from_sys_modules():
+    modules_to_remove = []
+    for module in sys.modules:
+        if module == "torch" or module.startswith("torch."):
+            modules_to_remove.append(module)
+    for module in modules_to_remove:
+        del sys.modules[module]
+def set_pt_mt_env_invalid():
+    global is_valid_pt_mt_env
+    is_valid_pt_mt_env = False
+def delete_torch_paths():
+    if not is_mindtorch():
+        set_pt_mt_env_invalid()
+    clear_torch_from_sys_modules()
+    for count_delete_env_path in range(MsCompareConst.MAX_RECURSION_DEPTH):
+        if not is_mindtorch():
+            break
+        remove_torch_related_paths()
+        clear_torch_from_sys_modules()
+        if count_delete_env_path >= MsCompareConst.MAX_RECURSION_DEPTH - 1:
+            raise Exception(f"Please check if you have a valid PyTorch and MindTorch environment, and ensure "
+                            f"the PYTHONPATH environment variable depth does not exceed {Const.MAX_RECURSION_DEPTH}.")
+if not is_mindtorch():
+    set_pt_mt_env_invalid()
+else:
+    initial_sys_path = sys.path.copy()
+    delete_torch_paths()
+    gc.collect()
+    import torch
+    if is_mindtorch():
+        set_pt_mt_env_invalid()
+    sys.path = initial_sys_path

msprobe/mindspore/api_accuracy_checker/type_mapping.py CHANGED Viewed

@@ -15,10 +15,18 @@
 import mindspore
 import numpy as np
-import torch
 from mindspore._c_expression import typing
 from mindspore.common import dtype as mstype
+from msprobe.mindspore.api_accuracy_checker import torch_mindtorch_importer
+if torch_mindtorch_importer.is_valid_pt_mt_env:
+    from msprobe.mindspore.api_accuracy_checker.torch_mindtorch_importer import mindtorch
+    from msprobe.mindspore.api_accuracy_checker.torch_mindtorch_importer import torch
+else:
+    from msprobe.mindspore.api_accuracy_checker.torch_mindtorch_importer import mindtorch
+    import torch
 INT8 = "Int8"
 UINT8 = "UInt8"
 INT16 = "Int16"
@@ -82,6 +90,21 @@ dtype_str_to_torch_dtype = {
 }
 torch_dtype_to_dtype_str = {value: key for key, value in dtype_str_to_torch_dtype.items()}
+dtype_str_to_mindtorch_dtype = {
+    INT8: mindtorch.int8,
+    UINT8: mindtorch.uint8,
+    INT16: mindtorch.int16,
+    INT32: mindtorch.int32,
+    INT64: mindtorch.int64,
+    FLOAT16: mindtorch.float16,
+    FLOAT32: mindtorch.float32,
+    FLOAT64: mindtorch.float64,
+    BOOL: mindtorch.bool,
+    BFLOAT16: mindtorch.bfloat16,
+}
+mindtorch_dtype_to_dtype_str = {value: key for key, value in dtype_str_to_mindtorch_dtype.items()}
 MINDSPORE_TENSOR_TYPE_STR = "mindspore.Tensor"
 BOOL_TYPE_STR = "bool"
 INT_TYPE_STR = "int"

msprobe/mindspore/api_accuracy_checker/utils.py CHANGED Viewed

@@ -82,10 +82,12 @@ class GlobalContext:
     def __init__(self):
         self.is_constructed = True
         self.dump_data_dir = ""
+        self.framework = Const.MS_FRAMEWORK
-    def init(self, is_constructed, dump_data_dir):
+    def init(self, is_constructed, dump_data_dir, framework):
         self.is_constructed = is_constructed
         self.dump_data_dir = dump_data_dir
+        self.framework = framework
     def get_dump_data_dir(self):
         return self.dump_data_dir
@@ -93,5 +95,8 @@ class GlobalContext:
     def get_is_constructed(self):
         return self.is_constructed
+    def get_framework(self):
+        return self.framework
 global_context = GlobalContext()

msprobe/mindspore/common/utils.py CHANGED Viewed

@@ -151,11 +151,10 @@ def is_mindtorch():
         mindtorch_check_result = False
         try:
             import torch
-            from mindspore._c_expression import Tensor
         except ImportError:
             return mindtorch_check_result
         tensor = torch.tensor(0.0)
-        if isinstance(tensor, Tensor):
+        if isinstance(tensor, ms.Tensor):
             mindtorch_check_result = True
     return mindtorch_check_result
@@ -179,3 +178,22 @@ def set_register_backward_hook_functions():
     else:
         register_backward_hook_functions["pre"] = ms.nn.Cell.register_backward_pre_hook
         register_backward_hook_functions["full"] = ms.nn.Cell.register_backward_hook
+def check_save_param(variable, name, save_backward):
+    # try catch this api to skip invalid call
+    if not isinstance(variable, (list, dict, ms.Tensor, int, float, str)):
+        logger.warning("PrecisionDebugger.save variable type not valid, "
+                       "should be one of list, dict, ms.Tensor, int, float or string. "
+                       "Skip current save process.")
+        raise ValueError
+    if not isinstance(name, str):
+        logger.warning("PrecisionDebugger.save name not valid, "
+                       "should be string. "
+                       "skip current save process.")
+        raise ValueError
+    if not isinstance(save_backward, bool):
+        logger.warning("PrecisionDebugger.save_backward name not valid, "
+                       "should be bool. "
+                       "Skip current save process.")
+        raise ValueError

msprobe/mindspore/debugger/debugger_config.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -16,9 +16,11 @@
 import os
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import MsprobeException
 from msprobe.core.common.file_utils import create_directory
 from msprobe.mindspore.common.const import Const as MsConst
 from msprobe.mindspore.common.const import FreeBenchmarkConst
+from msprobe.core.common.log import logger
 class DebuggerConfig:
@@ -50,7 +52,7 @@ class DebuggerConfig:
                                  if not task_config.handler_type else task_config.handler_type)
             self.stage = FreeBenchmarkConst.DEFAULT_STAGE if not task_config.fuzz_stage else task_config.fuzz_stage
             if self.handler_type == FreeBenchmarkConst.FIX and \
-               self.pert_type != FreeBenchmarkConst.DEFAULT_PERT_TYPE:
+                    self.pert_type != FreeBenchmarkConst.DEFAULT_PERT_TYPE:
                 raise ValueError("pert_mode must be improve_precision or empty when handler_type is fix, "
                                  f"but got {self.pert_type}.")
             if self.stage == Const.BACKWARD and self.handler_type == FreeBenchmarkConst.FIX:
@@ -72,4 +74,25 @@ class DebuggerConfig:
             self.check_mode = "all"
         if not isinstance(self.async_dump, bool):
             raise Exception("The parameters async_dump should be bool.")
+        if self.async_dump and self.task == Const.TENSOR and not self.list:
+            raise Exception("The parameters async_dump is true in tensor task, the parameters list cannot be empty.")
+        if self.task == Const.STRUCTURE and self.level_ori not in [Const.LEVEL_L0, Const.LEVEL_MIX]:
+            logger.warning_on_rank_0(
+                f"When the task is set to structure, the level should be one of {[Const.LEVEL_L0, Const.LEVEL_MIX]}. "
+                f"If not, the default level is {Const.LEVEL_MIX}."
+            )
+            self.level_ori = Const.LEVEL_MIX
         return True
+    def check_config_with_l2(self):
+        if self.level_ori != Const.LEVEL_L2:
+            return
+        if self.task != Const.TENSOR:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"When level is set to L2, the task must be set to tensor.")
+        if self.scope:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"When level is set to L2, the scope cannot be configured.")
+        if not self.list or len(self.list) != 1:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"When level is set to L2, the list must be configured as a list with one api name.")

msprobe/mindspore/debugger/precision_debugger.py CHANGED Viewed

@@ -25,7 +25,7 @@ from msprobe.core.common.file_utils import FileChecker
 from msprobe.core.common.utils import get_real_step_or_rank
 from msprobe.mindspore.cell_processor import CellProcessor
 from msprobe.mindspore.common.const import Const as MsConst
-from msprobe.mindspore.common.utils import set_register_backward_hook_functions
+from msprobe.mindspore.common.utils import set_register_backward_hook_functions, check_save_param
 from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
 from msprobe.mindspore.dump.hook_cell.api_registry import api_register
 from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell
@@ -89,6 +89,7 @@ class PrecisionDebugger:
         self.config.execution_mode = self._get_execution_mode()
         if self._need_service():
+            self.config.check_config_with_l2()
             self.service = Service(self.config)
         Runtime.step_count = 0
@@ -139,11 +140,11 @@ class PrecisionDebugger:
     def _is_graph_dump(config):
         if config.level != MsConst.KERNEL:
             return False
-        if not config.list or len(config.list) > 1:
+        if not config.list:
             return True
-        if '-' in config.list[0] or '/' in config.list[0]:
-            return True
-        return False
+        is_graph = any(item.startswith("name-regex") for item in config.list)
+        is_graph |= all("." not in item for item in config.list)
+        return is_graph
     @classmethod
     def start(cls, model=None):
@@ -214,6 +215,24 @@ class PrecisionDebugger:
             return
         instance.gm.monitor(opt)
+    @classmethod
+    def save(cls, variable, name, save_backward=True):
+        instance = cls._instance
+        if not instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
+        if instance.task not in [Const.TENSOR, Const.STATISTICS] or instance.config.level_ori != Const.LEVEL_DEBUG:
+            return
+        try:
+            check_save_param(variable, name, save_backward)
+        except ValueError:
+            return
+        instance.config.execution_mode = cls._get_execution_mode()
+        if cls._need_service():
+            if not instance.service:
+                instance.service = Service(instance.config)
+            instance.service.save(variable, name, save_backward)
     @classmethod
     def _need_service(cls):
         instance = cls._instance
@@ -222,4 +241,4 @@ class PrecisionDebugger:
         if instance.config.execution_mode != MsConst.PYNATIVE_MODE:
             return False
         else:
-            return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config)
+            return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config)

msprobe/mindspore/dump/hook_cell/api_registry.py CHANGED Viewed

@@ -106,6 +106,7 @@ class ApiRegistry:
             self.set_api_attr(torch.Tensor, self.torch_tensor_hook_attr)
             self.set_api_attr(torch.nn.functional, self.torch_functional_hook_attr)
             self.set_api_attr(torch.distributed, self.torch_distributed_hook_attr)
+            self.set_api_attr(torch.distributed.distributed_c10d, self.torch_distributed_hook_attr)
             self.set_api_attr(torch_npu, self.torch_npu_hook_attr)
         else:
             self.set_api_attr(Tensor, self.tensor_hook_attr)
@@ -121,6 +122,7 @@ class ApiRegistry:
             self.set_api_attr(torch.Tensor, self.torch_tensor_ori_attr)
             self.set_api_attr(torch.nn.functional, self.torch_functional_ori_attr)
             self.set_api_attr(torch.distributed, self.torch_distributed_ori_attr)
+            self.set_api_attr(torch.distributed.distributed_c10d, self.torch_distributed_ori_attr)
             self.set_api_attr(torch_npu, self.torch_npu_ori_attr)
         else:
             self.set_api_attr(Tensor, self.tensor_ori_attr)

msprobe/mindspore/dump/jit_dump.py CHANGED Viewed

@@ -16,14 +16,15 @@
 import os
 from collections import defaultdict
-from mindspore import Tensor
 from mindspore._c_expression import PyNativeExecutor_
-from mindspore.common.api import _MindsporeFunctionExecutor
+try:
+    from mindspore.common.api import _MindsporeFunctionExecutor
+except ImportError:
+    from mindspore.common.api import _JitExecutor as _MindsporeFunctionExecutor
 from msprobe.core.common.log import logger
-from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs
 from msprobe.core.common.const import Const
-from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs
+from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs
 from msprobe.mindspore.dump.hook_cell.api_registry import api_register
@@ -40,8 +41,8 @@ def dump_jit(name, in_feat, out_feat, is_forward):
     if JitDump.need_dump():
         if is_forward:
             JitDump.jit_count[result] += 1
-            name_template = Const.JIT + Const.SEP + result + Const.SEP + str(JitDump.jit_count[result]) + Const.SEP + \
-                            Const.FORWARD
+            name_template = (Const.JIT + Const.SEP + result + Const.SEP +
+                             str(JitDump.jit_count[result]) + Const.SEP + Const.FORWARD)
             JitDump.data_collector.update_api_or_module_name(name_template)
             module_input_output = ModuleForwardInputsOutputs(args=in_feat, kwargs={}, output=out_feat)
             JitDump.data_collector.forward_data_collect(name_template, None, pid, module_input_output)

mindstudio-probe 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

mindstudio-probe 1.2.1py3-none-any.whl → 1.2.2py3-none-any.whl