PyPI - mindstudio-probe - Versions diffs - 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

mindstudio-probe 1.2.1py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/METADATA +1 -1
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/RECORD +85 -66
msprobe/README.md +2 -2
msprobe/core/common/const.py +34 -9
msprobe/core/common/inplace_ops.yaml +1 -0
msprobe/core/common/utils.py +14 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +1 -1
msprobe/core/compare/merge_result/merge_result.py +8 -7
msprobe/core/compare/merge_result/utils.py +81 -0
msprobe/core/compare/utils.py +10 -0
msprobe/core/data_dump/data_collector.py +58 -13
msprobe/core/data_dump/data_processor/base.py +92 -8
msprobe/core/data_dump/data_processor/factory.py +3 -0
msprobe/core/data_dump/data_processor/mindspore_processor.py +17 -4
msprobe/core/data_dump/data_processor/pytorch_processor.py +58 -7
msprobe/core/data_dump/json_writer.py +26 -8
msprobe/docs/01.installation.md +25 -0
msprobe/docs/02.config_introduction.md +14 -12
msprobe/docs/03.config_examples.md +24 -0
msprobe/docs/05.data_dump_PyTorch.md +34 -15
msprobe/docs/06.data_dump_MindSpore.md +45 -22
msprobe/docs/09.accuracy_checker_MindSpore.md +4 -2
msprobe/docs/19.monitor.md +257 -260
msprobe/docs/21.visualization_PyTorch.md +10 -0
msprobe/docs/22.visualization_MindSpore.md +11 -0
msprobe/docs/27.dump_json_instruction.md +24 -20
msprobe/docs/28.debugger_save_instruction.md +94 -0
msprobe/docs/28.kernel_dump_MindSpore.md +69 -0
msprobe/docs/img/monitor/step_count_per_record.png +0 -0
msprobe/mindspore/__init__.py +1 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +26 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +54 -16
msprobe/mindspore/api_accuracy_checker/compute_element.py +47 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +129 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +24 -1
msprobe/mindspore/api_accuracy_checker/utils.py +6 -1
msprobe/mindspore/common/utils.py +20 -2
msprobe/mindspore/debugger/debugger_config.py +25 -2
msprobe/mindspore/debugger/precision_debugger.py +25 -6
msprobe/mindspore/dump/hook_cell/api_registry.py +2 -0
msprobe/mindspore/dump/jit_dump.py +7 -6
msprobe/mindspore/monitor/anomaly_detect.py +404 -0
msprobe/mindspore/monitor/distributed/__init__.py +0 -0
msprobe/mindspore/monitor/distributed/distributed_ops.yaml +15 -0
msprobe/mindspore/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +300 -0
msprobe/mindspore/monitor/features.py +63 -0
msprobe/mindspore/monitor/module_hook.py +821 -0
msprobe/mindspore/monitor/module_spec_verifier.py +94 -0
msprobe/mindspore/monitor/utils.py +267 -0
msprobe/mindspore/ms_config.py +8 -2
msprobe/mindspore/service.py +95 -21
msprobe/pytorch/__init__.py +0 -1
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +1 -1
msprobe/pytorch/bench_functions/apply_adam.py +215 -0
msprobe/pytorch/bench_functions/group_norm_silu.py +27 -0
msprobe/pytorch/bench_functions/mish.py +21 -0
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +44 -0
msprobe/pytorch/bench_functions/sort_v2.py +21 -0
msprobe/pytorch/common/utils.py +71 -0
msprobe/pytorch/debugger/debugger_config.py +19 -9
msprobe/pytorch/debugger/precision_debugger.py +14 -0
msprobe/pytorch/dump/module_dump/module_processer.py +10 -30
msprobe/pytorch/function_factory.py +7 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +2 -1
msprobe/pytorch/hook_module/wrap_distributed.py +4 -0
msprobe/pytorch/monitor/anomaly_detect.py +14 -29
msprobe/pytorch/monitor/csv2tb.py +10 -12
msprobe/pytorch/monitor/module_hook.py +123 -104
msprobe/pytorch/monitor/module_metric.py +6 -6
msprobe/pytorch/monitor/optimizer_collect.py +45 -63
msprobe/pytorch/monitor/utils.py +8 -43
msprobe/pytorch/pt_config.py +19 -22
msprobe/pytorch/service.py +103 -24
msprobe/visualization/builder/graph_builder.py +31 -5
msprobe/visualization/builder/msprobe_adapter.py +7 -5
msprobe/visualization/graph/base_node.py +3 -2
msprobe/visualization/graph/distributed_analyzer.py +80 -3
msprobe/visualization/graph/node_op.py +4 -2
msprobe/visualization/graph_service.py +3 -4
msprobe/visualization/utils.py +10 -2
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.2.2.dist-info}/top_level.txt +0 -0

msprobe/pytorch/bench_functions/apply_adam.py ADDED Viewed

@@ -0,0 +1,215 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+import torch
+VarParams = namedtuple('VarParams', ['var', 'lr_t', 'm_t', 'beta1_broad', 'grad', 'epsilon', 'v_t'])
+def _output_m_compute(m, beta1_broad, grad):
+    """
+    _output_m_compute
+    do compute m_t = m + (beta1 - 1) * (m - grad)
+    """
+    input_dtype = m.dtype
+    sneg_one = torch.ones((1), dtype=input_dtype) * -1
+    sneg_one = sneg_one.to(beta1_broad.device)
+    # `formula; beta1 -1`
+    vsub_beta1_1 = torch.add(beta1_broad, sneg_one)
+    # `formula; m - grad`
+    vsub_m_grad = torch.sub(m, grad)
+    # `formula; (beta1 - 1) * (m - grad)`
+    vmul_m = torch.mul(vsub_beta1_1, vsub_m_grad)
+    # `formula; m_t = m + (beta1 - 1) * (m - grad)`
+    m_t = torch.add(m, vmul_m)
+    return m_t
+def _output_v_compute(v, beta2, grad):
+    """
+    _output_v_compute
+    do compute v_t = v + (1 - beta2)*(grad*grad -v)
+    """
+    input_dtype = v.dtype
+    sneg_one = torch.ones((1), dtype=input_dtype) * -1
+    # `formula; broadcast beta2 to vector`
+    beta2_tensor = torch.tensor(beta2, dtype=input_dtype)
+    beta2_broad = beta2_tensor.expand_as(v)
+    # `formula; beta2 - 1`
+    vsub_beta2_1 = torch.add(beta2_broad, sneg_one)
+    vsub_beta2_1 = vsub_beta2_1.to(v.device)
+    # `formula; grad * grad`
+    vmul_grad_grad = torch.mul(grad, grad)
+    # `formula; (v - grad*grad)`
+    vsub_v_grad = torch.sub(v, vmul_grad_grad)
+    # `formula; (beta2 -1) * (v - grad * grad)`
+    vmul_grad = torch.mul(vsub_beta2_1, vsub_v_grad)
+    # `formula; v_t = v + (beta2 - 1) * (v - grad * grad)`
+    v_t = torch.add(v, vmul_grad)
+    return v_t
+def _inner_lr_compute(lr, beta2_power, beta1_power, compute_shape_tensor):
+    """
+    _inner_lr_compute
+    `formula; lr_t = learning_rate * (sqrt(1-beta2_power)) / (1 - beta1_power)`
+    """
+    input_dtype = compute_shape_tensor.dtype
+    s_one = torch.ones((1), dtype=input_dtype)
+    s_neg_one = torch.ones((1), dtype=input_dtype) * -1
+    # `formula; (1 - beta2_power)`
+    v_neg_beta2_power = torch.mul(beta2_power, s_neg_one)
+    v_add_beta2_power = torch.add(v_neg_beta2_power, s_one)
+    # `formula; sqrt(1 - beta2_power)`
+    v_sqrt_beta2_power = torch.sqrt(v_add_beta2_power)
+    # `formula; (1 - beta1_power)`
+    v_neg_beta1_power = torch.mul(beta1_power, s_neg_one)
+    v_add_beta1_power = torch.add(v_neg_beta1_power, s_one)
+    # `formula; learning_rate * (sqrt(1-beta2_power)`
+    res = torch.mul(lr, v_sqrt_beta2_power)
+    # `formula; learning_rate*(sqrt(1-beta2_power))/(1-beta1_power)`
+    res = torch.div(res, v_add_beta1_power)
+    return res.expand_as(compute_shape_tensor)
+def _inner_eps_add_sqrt_vt_compute(epsilon, v_t):
+    """
+    (epsilon + sqrt(v_t) )
+    """
+    # `formula; sqrt(v_t)`
+    sqrt_vt = torch.sqrt(v_t)
+    # `formula; broadcast epsilon  to vector`
+    input_dtype = v_t.dtype
+    epsilon_tensor = torch.tensor(epsilon, dtype=input_dtype)
+    epsilon_broad = epsilon_tensor.expand_as(v_t)
+    epsilon_broad = epsilon_broad.to(sqrt_vt.device)
+    # `formula; epsilon + sqrt(v_t)`
+    v_add_sqrt_v = torch.add(sqrt_vt, epsilon_broad)
+    return v_add_sqrt_v
+def _output_var_t_compute_use_nesterov(varparams):
+    """
+    _output_var_t_compute_use_nesterov
+    `formula; var_t = var - lr_t * (m_t * beta1 + (1 - beta1) * grad) / (epsilon + sqrt(v_t))`
+    `formula; var_t = var - lr_t * (m_t * beta1 + (1 - beta1) * grad) / (epsilon + sqrt(v_t))`
+    """
+    var = varparams.var
+    lr_t = varparams.lr_t
+    m_t = varparams.m_t
+    beta1_broad = varparams.beta1_broad
+    grad = varparams.grad
+    epsilon = varparams.epsilon
+    v_t = varparams.v_t
+    input_dtype = var.dtype
+    s_one = torch.ones((1), dtype=input_dtype)
+    s_neg_one = torch.ones((1), dtype=input_dtype) * -1
+    # `formula; m_t * beta1`
+    v_muls_mt_beta1 = torch.mul(m_t, beta1_broad)
+    # `formula; 1 -beta1`
+    v_neg_beta1 = torch.mul(beta1_broad, s_neg_one)
+    vsub_1_beta1 = torch.add(v_neg_beta1, s_one)
+    # `formula; (1-beta1)* grad`
+    v_mul_grad = torch.mul(vsub_1_beta1, grad)
+    # `formula; (m_t*beta1 + (1 - beta1)*grad)`
+    v_div_left = torch.add(v_muls_mt_beta1, v_mul_grad)
+    # `formula; lr_t * (m_t*beta1 + (1 - beta1) * grad)`
+    # broadcast lr_t to vector
+    lrt_broad = lr_t.expand_as(var)
+    v_mul_left = torch.mul(lrt_broad, v_div_left)
+    # `formula; (epsilon + sqrt(v_t))`
+    v_add_sqrt_v = _inner_eps_add_sqrt_vt_compute(epsilon, v_t)
+    # `formula; lr_t * (m_t*beta1 + (1-beta1)*grad / (epsilon + sqrt(v_t))`
+    v_div_res = torch.div(v_mul_left, v_add_sqrt_v)
+    # `formula; var - lr_t * (m_t*beta1 + (1-beta1)*grad) / (epsilon + sqrt(v_t))`
+    v_t = torch.sub(var, v_div_res)
+    return v_t
+def _output_var_t_compute(var, lr_t, m_t, epsilon, v_t):
+    """
+    _output_var_t_compute
+    `var_t = var - lr_t * m_t / (epsilon + sqrt(v_t))`
+    """
+    # `formula; lr_t * m_t`
+    lr_t = lr_t.to(m_t.device)
+    v_mul_left = torch.mul(lr_t, m_t)
+    # `formula; (epsilon + sqrt(v_t))`
+    v_add_sqrt_v = _inner_eps_add_sqrt_vt_compute(epsilon, v_t)
+    # `formula; lr_t * m_t /(epsilon + sqrt(v_t))`
+    v_div_res = torch.div(v_mul_left, v_add_sqrt_v)
+    # `formula; var - lr_t * m_t / (epsilon + sqrt(v_t))`
+    v_t = torch.sub(var, v_div_res)
+    return v_t
+def npu_apply_adam(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, use_locking, use_nesterov, out):
+    var, m, v = out
+    input_dtype = m.dtype
+    beta1_tensor = torch.tensor(beta1, dtype=input_dtype).to(m.device)
+    beta1_broad = beta1_tensor.expand_as(m)
+    m_t = _output_m_compute(m, beta1_broad, grad)
+    v_t = _output_v_compute(v, beta2, grad)
+    lr_t = _inner_lr_compute(lr, beta2_power, beta1_power, grad)
+    if use_nesterov:
+        var_params = VarParams(var, lr_t, m_t, beta1_broad, grad, epsilon, v_t)
+        var_t = _output_var_t_compute_use_nesterov(var_params)
+    else:
+        var_t = _output_var_t_compute(var, lr_t, m_t, epsilon, v_t)
+    return var_t, m_t, v_t

msprobe/pytorch/bench_functions/group_norm_silu.py ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+def npu_group_norm_silu(x, gama, beta, group, eps):
+    if len(x.shape) != 4:
+        raise ValueError("x shape should be (N, C, H, W)")
+    res = torch.ops.aten.native_group_norm(x, gama, beta, x.shape[0], x.shape[1], x.shape[2] * x.shape[3], group, eps)
+    res = list(res)
+    if not res:
+        raise ValueError("run native_group_norm failed")
+    res[0] = torch.nn.functional.silu(res[0])
+    return res

msprobe/pytorch/bench_functions/mish.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+def npu_mish(x):
+    mish = torch.nn.Mish()
+    return mish(x)

msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py ADDED Viewed

@@ -0,0 +1,44 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import numpy as np
+def softmax_func(x, axis=None):
+    x = x.float()
+    x_max = x.max(dim=axis, keepdims=True).values
+    x_sub = x - x_max
+    y = torch.exp(x_sub)
+    x_sum = y.sum(dim=axis, keepdims=True)
+    ans = 0 if (x_sum == 0).any() else y / x_sum
+    return ans
+def npu_moe_gating_top_k_softmax(x, finished_optional, k):
+    input_dtype = x.dtype
+    num_expert = x.shape[-1]
+    softmax = softmax_func(x, -1)
+    softmax = softmax.to(input_dtype)
+    expert_idx = torch.argsort(-softmax, dim=-1, stable=True)
+    expert_idx = expert_idx[:, :k]
+    y = torch.gather(softmax, index=expert_idx, dim=-1)
+    if finished_optional is not None:
+        finished_optional = finished_optional.view(finished_optional.shape[0], 1)
+        finished_optional = finished_optional.expand(-1, k)
+        expert_idx = torch.where(finished_optional, num_expert, expert_idx)
+    row_idx = torch.arange(y.shape[0] * y.shape[1]).reshape(y.shape[1], y.shape[0]).t()
+    return y, expert_idx, row_idx

msprobe/pytorch/bench_functions/sort_v2.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+def npu_sort_v2(x, dim=-1, descending=False, out=None):
+    y, _ = torch.sort(x, dim=dim, descending=descending)
+    return y

msprobe/pytorch/common/utils.py CHANGED Viewed

@@ -18,6 +18,7 @@ import os
 import pickle
 import random
 import stat
+import inspect
 from functools import wraps
 import numpy as np
@@ -402,3 +403,73 @@ def load_api_data(api_data_bytes):
     except Exception as e:
         raise RuntimeError(f"load api_data from bytes failed") from e
     return buffer
+def is_recomputation():
+    """Check if the current operation is in the re-computation phase.
+    This function inspects the current call stack to indicate whether the current operation is in the
+    re-computation phase. We use a blacklist mechanism, now supported megatron and mindspeed framework.
+    megatron: The 'backward' function is called by the 'torch/autograd/function.py' file.
+    mindspeed: The 'checkpoint_function_backward' function is called by the 'torch/autograd/function.py'
+    file or the custom module(use CheckpointWithoutOutput) with the 'recompute_fn' function is executed within the
+    'torch/utils/checkpoint.py' file.
+    Returns:
+        bool: True if in the re-computation phase, False otherwise.
+    """
+    backward_function_indices = []
+    call_stack = inspect.stack()
+    # Identify the function 'backward' is being executed within the 'torch/_tensor.py' file.
+    for frame_info in call_stack:
+        if frame_info.function == "recompute_fn" and frame_info.filename.endswith('torch/utils/checkpoint.py'):
+            del call_stack
+            return True
+    # Identify indices in the call stack where the specific function is being executed
+    for idx, frame_info in enumerate(call_stack):
+        if frame_info.function == Const.BACKWARD or frame_info.function == 'checkpoint_function_backward':
+            backward_function_indices.append(idx)
+    # Check if the execution is within 'torch/autograd/function.py' file
+    for idx in backward_function_indices:
+        # The Megatron and MindSpeed L0&L1 scenes
+        if idx + 1 < len(call_stack) and call_stack[idx + 1].filename.endswith('torch/autograd/function.py'):
+            del call_stack
+            return True
+        # The latest MindSpeed L2 and ModelLink scenes
+        if idx + 2 < len(call_stack) and call_stack[idx + 2].filename.endswith('torch/autograd/function.py'):
+            del call_stack
+            return True
+    del call_stack
+    return False
+def check_save_param(variable, name, save_backward):
+    # try catch this api to skip invalid call
+    if not isinstance(variable, (list, dict, torch.Tensor, int, float, str)):
+        logger.warning("PrecisionDebugger.save variable type not valid, "
+                       "should be one of list, dict, torch.Tensor, int, float or string. "
+                       "Skip current save process.")
+        raise ValueError
+    if not isinstance(name, str):
+        logger.warning("PrecisionDebugger.save name not valid, "
+                       "should be string. "
+                       "skip current save process.")
+        raise ValueError
+    if not isinstance(save_backward, bool):
+        logger.warning("PrecisionDebugger.save_backward name not valid, "
+                       "should be bool. "
+                       "Skip current save process.")
+        raise ValueError
+def replace_last_occurrence(text, old, new):
+    if text is None:
+        return text
+    index = text.rfind(old)
+    if index != -1:
+        return text[:index] + text[index:].replace(old, new, 1)
+    return text

msprobe/pytorch/debugger/debugger_config.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -26,7 +26,7 @@ class DebuggerConfig:
         self.task = task or common_config.task or Const.STATISTICS
         self.rank = common_config.rank if common_config.rank else []
         self.step = common_config.step if common_config.step else []
-        self.level = level or common_config.level or "L1"
+        self.level = level or common_config.level or Const.LEVEL_L1
         self.enable_dataloader = common_config.enable_dataloader
         self.scope = task_config.scope if task_config.scope else []
         self.list = task_config.list if task_config.list else []
@@ -36,10 +36,6 @@ class DebuggerConfig:
         self.framework = Const.PT_FRAMEWORK
         self.async_dump = common_config.async_dump if common_config.async_dump else False
-        if self.level == Const.LEVEL_L2:
-            self.is_backward_kernel_dump = False
-            self._check_and_adjust_config_with_l2()
         if self.task == Const.FREE_BENCHMARK:
             self.fuzz_device = task_config.fuzz_device
             self.handler_type = task_config.handler_type
@@ -65,6 +61,10 @@ class DebuggerConfig:
         self.check()
+        if self.level == Const.LEVEL_L2:
+            self.is_backward_kernel_dump = False
+            self._check_and_adjust_config_with_l2()
     def check_kwargs(self):
         if self.task and self.task not in Const.TASK_LIST:
             raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
@@ -78,6 +78,16 @@ class DebuggerConfig:
         if not isinstance(self.async_dump, bool):
             raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
                                    f"The parameters async_dump should be bool.")
+        if self.async_dump and self.task == Const.TENSOR and not self.list:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"The parameters async_dump is true in tensor task, the parameters list cannot be "
+                                   f"empty.")
+        if self.task == Const.STRUCTURE and self.level not in [Const.LEVEL_L0, Const.LEVEL_MIX]:
+            logger.warning_on_rank_0(
+                f"When the task is set to structure, the level should be one of {[Const.LEVEL_L0, Const.LEVEL_MIX]}. "
+                f"If not, the default level is {Const.LEVEL_MIX}."
+            )
+            self.level = Const.LEVEL_MIX
     def check(self):
         self.check_kwargs()
@@ -93,10 +103,10 @@ class DebuggerConfig:
             logger.error_on_rank_0(
                 f"For level {self.level}, PrecisionDebugger or start interface must receive a 'model' parameter.")
             raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR, f"missing the parameter 'model'")
         instance.model = start_model if start_model is not None else instance.model
         if isinstance(instance.model, torch.nn.Module):
-            return
+            return
         error_model = None
         if isinstance(instance.model, (list, tuple)):
@@ -108,7 +118,7 @@ class DebuggerConfig:
             error_model = instance.model
         if error_model is not None:
-            error_info = (f"The 'model' parameter must be a torch.nn.Moudle or list[torch.nn.Moudle] "
+            error_info = (f"The 'model' parameter must be a torch.nn.Module or list[torch.nn.Module] "
                           f"type, currently there is a {type(error_model)} type.")
             raise MsprobeException(
                 MsprobeException.INVALID_PARAM_ERROR, error_info)

msprobe/pytorch/debugger/precision_debugger.py CHANGED Viewed

@@ -21,6 +21,7 @@ from msprobe.core.common.exceptions import MsprobeException
 from msprobe.core.common.file_utils import FileChecker
 from msprobe.core.common.utils import get_real_step_or_rank
 from msprobe.pytorch.common.log import logger
+from msprobe.pytorch.common.utils import check_save_param
 from msprobe.pytorch.debugger.debugger_config import DebuggerConfig
 from msprobe.pytorch.dump.module_dump.module_dump import ModuleDumper
 from msprobe.pytorch.grad_probe.grad_monitor import GradientMonitor
@@ -158,6 +159,19 @@ class PrecisionDebugger:
             return
         cls._instance.gm.monitor(model)
+    @classmethod
+    def save(cls, variable, name, save_backward=True):
+        instance = cls._instance
+        if not instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
+        if instance.task not in [Const.TENSOR, Const.STATISTICS] or instance.config.level != Const.LEVEL_DEBUG:
+            return
+        try:
+            check_save_param(variable, name, save_backward)
+        except ValueError:
+            return
+        instance.service.save(variable, name, save_backward)
 def module_dump(module, dump_name):
     if not isinstance(module, torch.nn.Module):

msprobe/pytorch/dump/module_dump/module_processer.py CHANGED Viewed

@@ -19,6 +19,7 @@ import torch
 from msprobe.core.common.const import Const
 from msprobe.core.data_dump.scope import BaseScope, ModuleRangeScope, MixRangeScope
 from msprobe.pytorch.common.log import logger
+from msprobe.pytorch.common.utils import replace_last_occurrence
 from torch.utils.checkpoint import checkpoint as origin_checkpoint
 from torch.utils.checkpoint import set_checkpoint_early_stop
 from torch.utils.hooks import BackwardHook
@@ -45,29 +46,8 @@ class ModuleProcesser:
         self.scope = scope if isinstance(scope, (ModuleRangeScope, MixRangeScope)) else None
         BackwardHook.setup_input_hook = ModuleProcesser.clone_return_value(BackwardHook.setup_input_hook)
         BackwardHook.setup_output_hook = ModuleProcesser.clone_return_value(BackwardHook.setup_output_hook)
-        BackwardHook.setup_output_hook = ModuleProcesser.filter_tensor_and_tuple(BackwardHook.setup_output_hook)
         replace_checkpoint()
-    @staticmethod
-    def filter_tensor_and_tuple(func):
-        @wraps(func)
-        def wrap_by_filter_tensor_and_tuple(*args, **kwargs):
-            # setup_output_hook传入非tensor数据，工具后续dump会报错，处理方式是解析非tensor数据的属性，对tensor属性挂hook
-            # setup_output_hook定义为setup_output_hook(self, args)，因此处理第二个位置参数，即*args[1]
-            if not isinstance(args[1], (torch.Tensor, tuple)):
-                for item_str in dir(args[1]):
-                    item = getattr(args[1], item_str)
-                    # 处理tensor或者只包含tensor的元组
-                    if isinstance(item, torch.Tensor) or \
-                            (isinstance(item, tuple) and all(isinstance(x, torch.Tensor) for x in item)):
-                        args_new = (args[0], item)
-                        result = func(*args_new, **kwargs)
-                        setattr(args[1], item_str, result)
-                return args[1]
-            return func(*args, **kwargs)
-        return wrap_by_filter_tensor_and_tuple
     @staticmethod
     def clone_return_value(func):
         @wraps(func)
@@ -81,11 +61,11 @@ class ModuleProcesser:
     def clone_if_tensor(result):
         if isinstance(result, torch.Tensor):
             return result.clone()
-        elif isinstance(result, tuple):
+        elif type(result) is tuple:
             return tuple(ModuleProcesser.clone_if_tensor(x) for x in result)
-        elif isinstance(result, list):
+        elif type(result) is list:
             return list(ModuleProcesser.clone_if_tensor(x) for x in result)
-        elif isinstance(result, dict):
+        elif type(result) is dict:
             return {k: ModuleProcesser.clone_if_tensor(v) for k, v in result.items()}
         else:
             return result
@@ -103,7 +83,7 @@ class ModuleProcesser:
         return hasattr(module, '_backward_hooks') and \
             len(module._backward_hooks) > 0 and \
             module._is_full_backward_hook is False
     @staticmethod
     def get_modules_and_names(models):
         modules_and_names_with_index = {}
@@ -130,8 +110,8 @@ class ModuleProcesser:
                 if module == model:
                     continue
                 module_index = (index + Const.SEP) if index != "-1" else ""
-                prefix_name = (BaseScope.Module_Type_Module + Const.SEP + module_index +
-                                name + Const.SEP + module.__class__.__name__ + Const.SEP)
+                prefix_name = (BaseScope.Module_Type_Module + Const.SEP + module_index +
+                               name + Const.SEP + module.__class__.__name__ + Const.SEP)
                 pre_forward_hook, forward_hook, backward_hook, forward_hook_torch_version_below_2 = build_hook(
                     BaseScope.Module_Type_Module,
                     prefix_name
@@ -203,9 +183,9 @@ class ModuleProcesser:
             if not hasattr(module, "mindstudio_reserved_name") or not module.mindstudio_reserved_name:
                 module.mindstudio_reserved_name = []
             module.mindstudio_reserved_name.append(full_name)
-            forward_full_name = full_name.replace(Const.BACKWARD, Const.FORWARD)
-            ModuleProcesser.module_node[full_name] = ModuleProcesser.module_node[forward_full_name].replace(
-                Const.FORWARD, Const.BACKWARD) if ModuleProcesser.module_node[forward_full_name] else None
+            forward_full_name = replace_last_occurrence(full_name, Const.BACKWARD, Const.FORWARD)
+            ModuleProcesser.module_node[full_name] = replace_last_occurrence(
+                ModuleProcesser.module_node.get(forward_full_name), Const.FORWARD, Const.BACKWARD)
             ModuleProcesser.api_parent_node = None
             if self.scope:
                 self.scope.begin_module(full_name)

msprobe/pytorch/function_factory.py CHANGED Viewed

@@ -27,6 +27,11 @@ from msprobe.pytorch.bench_functions.rotary_mul import npu_rotary_mul, npu_rotar
 from msprobe.pytorch.bench_functions.scaled_mask_softmax import npu_scaled_masked_softmax, \
     npu_scaled_masked_softmax_backward
 from msprobe.pytorch.bench_functions.swiglu import npu_swiglu, npu_swiglu_backward
+from msprobe.pytorch.bench_functions.apply_adam import npu_apply_adam
+from msprobe.pytorch.bench_functions.group_norm_silu import npu_group_norm_silu
+from msprobe.pytorch.bench_functions.mish import npu_mish
+from msprobe.pytorch.bench_functions.moe_gating_top_k_softmax import npu_moe_gating_top_k_softmax
+from msprobe.pytorch.bench_functions.sort_v2 import npu_sort_v2
 from msprobe.pytorch.common.utils import logger
@@ -79,7 +84,8 @@ class Register(dict):
 npu_custom_functions = Register()
 npu_custom_functions([
     npu_apply_adam_w, npu_confusion_transpose, npu_fast_gelu, npu_layer_norm_eval, npu_linear, npu_fusion_attention,
-    npu_rms_norm, npu_rotary_mul, npu_scaled_masked_softmax, npu_swiglu, gpu_fusion_attention
+    npu_rms_norm, npu_rotary_mul, npu_scaled_masked_softmax, npu_swiglu, gpu_fusion_attention, npu_apply_adam,
+    npu_group_norm_silu, npu_mish, npu_moe_gating_top_k_softmax, npu_sort_v2
 ])
 # register for npu custom backward bench functions

msprobe/pytorch/hook_module/support_wrap_ops.yaml CHANGED Viewed

@@ -1911,4 +1911,5 @@ distributed:
   - all_to_all_single
   - all_to_all
   - all_gather_into_tensor
-  - reduce_scatter_tensor
+  - reduce_scatter_tensor
+  - batch_isend_irecv

msprobe/pytorch/hook_module/wrap_distributed.py CHANGED Viewed

@@ -57,6 +57,10 @@ class DistributedOPTemplate(HOOKModule):
         if kwargs.get("async_op") or self.op_name_ in ["isend", "irecv"]:
             if handle and hasattr(handle, 'wait'):
                 handle.wait()
+        if self.op_name_ == "batch_isend_irecv":
+            if isinstance(handle, list):
+                for req in handle:
+                    req.wait()
         return handle

mindstudio-probe 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

mindstudio-probe 1.2.1py3-none-any.whl → 1.2.2py3-none-any.whl