PyPI - mindstudio-probe - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (177) hide show

{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/METADATA +3 -3
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/RECORD +168 -150
msprobe/README.md +27 -22
msprobe/core/common/const.py +129 -60
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +25 -2
msprobe/core/common/inplace_ops.yaml +1 -0
msprobe/core/common/utils.py +43 -33
msprobe/core/compare/acc_compare.py +43 -74
msprobe/core/compare/check.py +2 -6
msprobe/core/compare/highlight.py +2 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +1 -1
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -1
msprobe/core/compare/merge_result/merge_result.py +16 -9
msprobe/core/compare/merge_result/utils.py +81 -0
msprobe/core/compare/multiprocessing_compute.py +19 -12
msprobe/core/compare/npy_compare.py +30 -12
msprobe/core/compare/utils.py +30 -10
msprobe/core/data_dump/api_registry.py +176 -0
msprobe/core/data_dump/data_collector.py +58 -13
msprobe/core/data_dump/data_processor/base.py +94 -10
msprobe/core/data_dump/data_processor/factory.py +3 -0
msprobe/core/data_dump/data_processor/mindspore_processor.py +33 -33
msprobe/core/data_dump/data_processor/pytorch_processor.py +99 -18
msprobe/core/data_dump/json_writer.py +61 -40
msprobe/core/grad_probe/constant.py +1 -0
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/docs/01.installation.md +27 -1
msprobe/docs/02.config_introduction.md +27 -23
msprobe/docs/03.config_examples.md +24 -0
msprobe/docs/05.data_dump_PyTorch.md +103 -16
msprobe/docs/06.data_dump_MindSpore.md +76 -32
msprobe/docs/07.accuracy_checker_PyTorch.md +11 -1
msprobe/docs/08.accuracy_checker_online_PyTorch.md +3 -1
msprobe/docs/09.accuracy_checker_MindSpore.md +5 -3
msprobe/docs/10.accuracy_compare_PyTorch.md +59 -33
msprobe/docs/11.accuracy_compare_MindSpore.md +40 -16
msprobe/docs/12.overflow_check_PyTorch.md +3 -1
msprobe/docs/13.overflow_check_MindSpore.md +4 -2
msprobe/docs/14.data_parse_PyTorch.md +1 -7
msprobe/docs/18.online_dispatch.md +1 -1
msprobe/docs/19.monitor.md +332 -273
msprobe/docs/21.visualization_PyTorch.md +42 -13
msprobe/docs/22.visualization_MindSpore.md +43 -13
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/27.dump_json_instruction.md +301 -27
msprobe/docs/28.debugger_save_instruction.md +94 -0
msprobe/docs/28.kernel_dump_MindSpore.md +69 -0
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/step_count_per_record.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +4 -2
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +32 -7
msprobe/mindspore/api_accuracy_checker/api_runner.py +70 -22
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +602 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +47 -1
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -1
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +2 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +130 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +24 -1
msprobe/mindspore/api_accuracy_checker/utils.py +6 -1
msprobe/mindspore/common/const.py +61 -0
msprobe/mindspore/common/utils.py +48 -18
msprobe/mindspore/compare/ms_compare.py +27 -19
msprobe/mindspore/compare/ms_graph_compare.py +6 -5
msprobe/mindspore/debugger/debugger_config.py +31 -6
msprobe/mindspore/debugger/precision_debugger.py +45 -14
msprobe/mindspore/dump/dump_tool_factory.py +5 -3
msprobe/mindspore/dump/hook_cell/api_register.py +142 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +9 -10
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +24 -26
msprobe/mindspore/dump/jit_dump.py +21 -15
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +22 -56
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -1
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +10 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +2 -0
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/monitor/anomaly_detect.py +404 -0
msprobe/mindspore/monitor/distributed/__init__.py +0 -0
msprobe/mindspore/monitor/distributed/distributed_ops.yaml +15 -0
msprobe/mindspore/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +300 -0
msprobe/mindspore/monitor/features.py +63 -0
msprobe/mindspore/monitor/module_hook.py +873 -0
msprobe/mindspore/monitor/module_spec_verifier.py +94 -0
msprobe/mindspore/monitor/utils.py +309 -0
msprobe/mindspore/ms_config.py +8 -2
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/service.py +114 -34
msprobe/pytorch/__init__.py +0 -1
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +12 -7
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +4 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +5 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +25 -6
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -19
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/bench_functions/apply_adam.py +215 -0
msprobe/pytorch/bench_functions/group_norm_silu.py +27 -0
msprobe/pytorch/{parse.py → bench_functions/mish.py} +6 -4
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +50 -0
msprobe/pytorch/bench_functions/sort_v2.py +21 -0
msprobe/pytorch/common/utils.py +97 -4
msprobe/pytorch/debugger/debugger_config.py +19 -9
msprobe/pytorch/debugger/precision_debugger.py +24 -1
msprobe/pytorch/dump/module_dump/module_dump.py +4 -3
msprobe/pytorch/dump/module_dump/module_processer.py +21 -35
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +8 -2
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/hook_module/api_register.py +131 -0
msprobe/pytorch/hook_module/hook_module.py +19 -14
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +173 -75
msprobe/pytorch/monitor/anomaly_detect.py +14 -29
msprobe/pytorch/monitor/csv2tb.py +18 -14
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +238 -193
msprobe/pytorch/monitor/module_metric.py +9 -6
msprobe/pytorch/monitor/optimizer_collect.py +100 -67
msprobe/pytorch/monitor/unittest/test_monitor.py +1 -1
msprobe/pytorch/monitor/utils.py +76 -44
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +9 -0
msprobe/pytorch/online_dispatch/dump_compare.py +3 -0
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +2 -1
msprobe/pytorch/pt_config.py +30 -29
msprobe/pytorch/service.py +114 -32
msprobe/visualization/builder/graph_builder.py +75 -10
msprobe/visualization/builder/msprobe_adapter.py +7 -6
msprobe/visualization/compare/graph_comparator.py +42 -38
msprobe/visualization/compare/mode_adapter.py +0 -19
msprobe/visualization/graph/base_node.py +11 -3
msprobe/visualization/graph/distributed_analyzer.py +71 -3
msprobe/visualization/graph/graph.py +0 -11
msprobe/visualization/graph/node_op.py +4 -3
msprobe/visualization/graph_service.py +4 -5
msprobe/visualization/utils.py +12 -35
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -205
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -75
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/top_level.txt +0 -0

msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py CHANGED Viewed

@@ -1,9 +1,7 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
-# Licensed under the Apache License, Version 2.0  (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
@@ -18,8 +16,8 @@
 import os
 from collections import namedtuple
 import re
-import torch
+import torch
 try:
     import torch_npu
 except ImportError:
@@ -33,11 +31,9 @@ from msprobe.core.common.const import FileCheckConst, Const, CompareConst
 from msprobe.core.common.file_utils import FileChecker
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import CompareException
+from msprobe.pytorch.hook_module.api_register import ApiTemplate, get_api_register
 from msprobe.pytorch.hook_module.wrap_aten import AtenOPTemplate
-from msprobe.pytorch.hook_module.wrap_functional import FunctionalOPTemplate
-from msprobe.pytorch.hook_module.wrap_npu_custom import NpuOPTemplate
-from msprobe.pytorch.hook_module.wrap_tensor import TensorOPTemplate
-from msprobe.pytorch.hook_module.wrap_torch import TorchOPTemplate
 hf_32_standard_api = ["conv1d", "conv2d"]
 not_detach_set = {'resize_', 'resize_as_', 'set_', 'transpose_', 't_', 'squeeze_', 'unsqueeze_'}
@@ -108,17 +104,30 @@ def exec_api(exec_params):
     kwargs = exec_params.kwargs
     is_autocast = exec_params.is_autocast
     autocast_dtype = exec_params.autocast_dtype
-    if api_type == "Functional":
-        torch_api = FunctionalOPTemplate(api_name, str, False)
-    if api_type == "Tensor":
-        torch_api = TensorOPTemplate(api_name, str, False)
-    if api_type == "Torch":
-        torch_api = TorchOPTemplate(api_name, str, False)
-    if api_type == "Aten":
+    out = None
+    prefix_map = Const.API_DATA_PREFIX.get(Const.PT_FRAMEWORK, {})
+    if not prefix_map or api_type not in prefix_map.values() or \
+        api_type not in (
+            Const.FUNCTIONAL_API_TYPE_PREFIX,
+            Const.TENSOR_API_TYPE_PREFIX,
+            Const.TORCH_API_TYPE_PREFIX,
+            Const.ATEN_API_TYPE_PREFIX,
+            Const.NPU_API_TYPE_PREFIX
+    ):
+        return out
+    if api_type == Const.ATEN_API_TYPE_PREFIX:
         torch_api = AtenOPTemplate(api_name, None, False)
-    if api_type == "NPU":
-        torch_api = NpuOPTemplate(api_name, None, False, device)
+    else:
+        api_register = get_api_register()
+        api_register.initialize_hook(None)
+        api_func_type = list(prefix_map.keys())[list(prefix_map.values()).index(api_type)]
+        api_func = api_register.ori_api_attr.get(Const.PT_FRAMEWORK + Const.SEP + api_func_type, {}).get(api_name)
+        if api_func is None:
+            return out
+        torch_api = ApiTemplate(api_name, api_func, api_type, None, need_hook=False, device=device)
     if is_autocast:
         with autocast(dtype=autocast_dtype):
             out = torch_api.forward(*args, **kwargs)

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py CHANGED Viewed

@@ -27,6 +27,7 @@ from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.client import T
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.server import TCPServer
 from msprobe.core.common.file_utils import remove_path
 from msprobe.pytorch.common.utils import logger, save_api_data, load_api_data, save_pkl, load_pkl
+from msprobe.core.common.decorator import recursion_depth_decorator
 BufferType = Union[ApiData, Dict[str, Any], str]  # Union[Tensor, Tuple[Optional[Tensor]]]
@@ -168,11 +169,12 @@ class ATTL:
         return buffer
+@recursion_depth_decorator("move2device_exec")
 def move2device_exec(obj, device):
     if isinstance(obj, (tuple, list)):
         data_list = [move2device_exec(val, device) for val in obj]
         return data_list if isinstance(obj, list) else tuple(data_list)
-    if isinstance(obj, dict):
+    if isinstance(obj, dict):
         return {key: move2device_exec(val, device) for key, val in obj.items()}
     elif isinstance(obj, torch.Tensor):
         obj = obj.detach()

msprobe/pytorch/bench_functions/apply_adam.py ADDED Viewed

@@ -0,0 +1,215 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+import torch
+VarParams = namedtuple('VarParams', ['var', 'lr_t', 'm_t', 'beta1_broad', 'grad', 'epsilon', 'v_t'])
+def _output_m_compute(m, beta1_broad, grad):
+    """
+    _output_m_compute
+    do compute m_t = m + (beta1 - 1) * (m - grad)
+    """
+    input_dtype = m.dtype
+    sneg_one = torch.ones((1), dtype=input_dtype) * -1
+    sneg_one = sneg_one.to(beta1_broad.device)
+    # `formula; beta1 -1`
+    vsub_beta1_1 = torch.add(beta1_broad, sneg_one)
+    # `formula; m - grad`
+    vsub_m_grad = torch.sub(m, grad)
+    # `formula; (beta1 - 1) * (m - grad)`
+    vmul_m = torch.mul(vsub_beta1_1, vsub_m_grad)
+    # `formula; m_t = m + (beta1 - 1) * (m - grad)`
+    m_t = torch.add(m, vmul_m)
+    return m_t
+def _output_v_compute(v, beta2, grad):
+    """
+    _output_v_compute
+    do compute v_t = v + (1 - beta2)*(grad*grad -v)
+    """
+    input_dtype = v.dtype
+    sneg_one = torch.ones((1), dtype=input_dtype) * -1
+    # `formula; broadcast beta2 to vector`
+    beta2_tensor = torch.tensor(beta2, dtype=input_dtype)
+    beta2_broad = beta2_tensor.expand_as(v)
+    # `formula; beta2 - 1`
+    vsub_beta2_1 = torch.add(beta2_broad, sneg_one)
+    vsub_beta2_1 = vsub_beta2_1.to(v.device)
+    # `formula; grad * grad`
+    vmul_grad_grad = torch.mul(grad, grad)
+    # `formula; (v - grad*grad)`
+    vsub_v_grad = torch.sub(v, vmul_grad_grad)
+    # `formula; (beta2 -1) * (v - grad * grad)`
+    vmul_grad = torch.mul(vsub_beta2_1, vsub_v_grad)
+    # `formula; v_t = v + (beta2 - 1) * (v - grad * grad)`
+    v_t = torch.add(v, vmul_grad)
+    return v_t
+def _inner_lr_compute(lr, beta2_power, beta1_power, compute_shape_tensor):
+    """
+    _inner_lr_compute
+    `formula; lr_t = learning_rate * (sqrt(1-beta2_power)) / (1 - beta1_power)`
+    """
+    input_dtype = compute_shape_tensor.dtype
+    s_one = torch.ones((1), dtype=input_dtype)
+    s_neg_one = torch.ones((1), dtype=input_dtype) * -1
+    # `formula; (1 - beta2_power)`
+    v_neg_beta2_power = torch.mul(beta2_power, s_neg_one)
+    v_add_beta2_power = torch.add(v_neg_beta2_power, s_one)
+    # `formula; sqrt(1 - beta2_power)`
+    v_sqrt_beta2_power = torch.sqrt(v_add_beta2_power)
+    # `formula; (1 - beta1_power)`
+    v_neg_beta1_power = torch.mul(beta1_power, s_neg_one)
+    v_add_beta1_power = torch.add(v_neg_beta1_power, s_one)
+    # `formula; learning_rate * (sqrt(1-beta2_power)`
+    res = torch.mul(lr, v_sqrt_beta2_power)
+    # `formula; learning_rate*(sqrt(1-beta2_power))/(1-beta1_power)`
+    res = torch.div(res, v_add_beta1_power)
+    return res.expand_as(compute_shape_tensor)
+def _inner_eps_add_sqrt_vt_compute(epsilon, v_t):
+    """
+    (epsilon + sqrt(v_t) )
+    """
+    # `formula; sqrt(v_t)`
+    sqrt_vt = torch.sqrt(v_t)
+    # `formula; broadcast epsilon  to vector`
+    input_dtype = v_t.dtype
+    epsilon_tensor = torch.tensor(epsilon, dtype=input_dtype)
+    epsilon_broad = epsilon_tensor.expand_as(v_t)
+    epsilon_broad = epsilon_broad.to(sqrt_vt.device)
+    # `formula; epsilon + sqrt(v_t)`
+    v_add_sqrt_v = torch.add(sqrt_vt, epsilon_broad)
+    return v_add_sqrt_v
+def _output_var_t_compute_use_nesterov(varparams):
+    """
+    _output_var_t_compute_use_nesterov
+    `formula; var_t = var - lr_t * (m_t * beta1 + (1 - beta1) * grad) / (epsilon + sqrt(v_t))`
+    `formula; var_t = var - lr_t * (m_t * beta1 + (1 - beta1) * grad) / (epsilon + sqrt(v_t))`
+    """
+    var = varparams.var
+    lr_t = varparams.lr_t
+    m_t = varparams.m_t
+    beta1_broad = varparams.beta1_broad
+    grad = varparams.grad
+    epsilon = varparams.epsilon
+    v_t = varparams.v_t
+    input_dtype = var.dtype
+    s_one = torch.ones((1), dtype=input_dtype)
+    s_neg_one = torch.ones((1), dtype=input_dtype) * -1
+    # `formula; m_t * beta1`
+    v_muls_mt_beta1 = torch.mul(m_t, beta1_broad)
+    # `formula; 1 -beta1`
+    v_neg_beta1 = torch.mul(beta1_broad, s_neg_one)
+    vsub_1_beta1 = torch.add(v_neg_beta1, s_one)
+    # `formula; (1-beta1)* grad`
+    v_mul_grad = torch.mul(vsub_1_beta1, grad)
+    # `formula; (m_t*beta1 + (1 - beta1)*grad)`
+    v_div_left = torch.add(v_muls_mt_beta1, v_mul_grad)
+    # `formula; lr_t * (m_t*beta1 + (1 - beta1) * grad)`
+    # broadcast lr_t to vector
+    lrt_broad = lr_t.expand_as(var)
+    v_mul_left = torch.mul(lrt_broad, v_div_left)
+    # `formula; (epsilon + sqrt(v_t))`
+    v_add_sqrt_v = _inner_eps_add_sqrt_vt_compute(epsilon, v_t)
+    # `formula; lr_t * (m_t*beta1 + (1-beta1)*grad / (epsilon + sqrt(v_t))`
+    v_div_res = torch.div(v_mul_left, v_add_sqrt_v)
+    # `formula; var - lr_t * (m_t*beta1 + (1-beta1)*grad) / (epsilon + sqrt(v_t))`
+    v_t = torch.sub(var, v_div_res)
+    return v_t
+def _output_var_t_compute(var, lr_t, m_t, epsilon, v_t):
+    """
+    _output_var_t_compute
+    `var_t = var - lr_t * m_t / (epsilon + sqrt(v_t))`
+    """
+    # `formula; lr_t * m_t`
+    lr_t = lr_t.to(m_t.device)
+    v_mul_left = torch.mul(lr_t, m_t)
+    # `formula; (epsilon + sqrt(v_t))`
+    v_add_sqrt_v = _inner_eps_add_sqrt_vt_compute(epsilon, v_t)
+    # `formula; lr_t * m_t /(epsilon + sqrt(v_t))`
+    v_div_res = torch.div(v_mul_left, v_add_sqrt_v)
+    # `formula; var - lr_t * m_t / (epsilon + sqrt(v_t))`
+    v_t = torch.sub(var, v_div_res)
+    return v_t
+def npu_apply_adam(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, use_locking, use_nesterov, out):
+    var, m, v = out
+    input_dtype = m.dtype
+    beta1_tensor = torch.tensor(beta1, dtype=input_dtype).to(m.device)
+    beta1_broad = beta1_tensor.expand_as(m)
+    m_t = _output_m_compute(m, beta1_broad, grad)
+    v_t = _output_v_compute(v, beta2, grad)
+    lr_t = _inner_lr_compute(lr, beta2_power, beta1_power, grad)
+    if use_nesterov:
+        var_params = VarParams(var, lr_t, m_t, beta1_broad, grad, epsilon, v_t)
+        var_t = _output_var_t_compute_use_nesterov(var_params)
+    else:
+        var_t = _output_var_t_compute(var, lr_t, m_t, epsilon, v_t)
+    return var_t, m_t, v_t

msprobe/pytorch/bench_functions/group_norm_silu.py ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+def npu_group_norm_silu(x, gama, beta, group, eps):
+    if len(x.shape) != 4:
+        raise ValueError("x shape should be (N, C, H, W)")
+    res = torch.ops.aten.native_group_norm(x, gama, beta, x.shape[0], x.shape[1], x.shape[2] * x.shape[3], group, eps)
+    res = list(res)
+    if not res:
+        raise ValueError("run native_group_norm failed")
+    res[0] = torch.nn.functional.silu(res[0])
+    return res

msprobe/pytorch/{parse.py → bench_functions/mish.py} RENAMED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -13,7 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from msprobe.pytorch.parse_tool import cli
+import torch
-if __name__ == '__main__':
-    cli.parse()
+def npu_mish(x):
+    mish = torch.nn.Mish()
+    return mish(x)

msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py ADDED Viewed

@@ -0,0 +1,50 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import numpy as np
+def softmax_func(x, axis=None):
+    x = x.float()
+    x_max = x.max(dim=axis, keepdims=True).values
+    x_sub = x - x_max
+    y = torch.exp(x_sub)
+    x_sum = y.sum(dim=axis, keepdims=True)
+    ans = 0 if (x_sum == 0).any() else y / x_sum
+    return ans
+def npu_moe_gating_top_k_softmax(x, finished_optional, k):
+    input_dtype = x.dtype
+    if x.dim() < 1:
+        raise ValueError("Input x must have at least 1 dimensions.")
+    num_expert = x.shape[-1]
+    softmax = softmax_func(x, -1)
+    softmax = softmax.to(input_dtype)
+    expert_idx = torch.argsort(-softmax, dim=-1, stable=True)
+    expert_idx = expert_idx[:, :k]
+    y = torch.gather(softmax, index=expert_idx, dim=-1)
+    if finished_optional is not None:
+        if finished_optional.dim() < 1:
+            raise ValueError("Finished_optional must have at least 1 dimensions.")
+        finished_optional = finished_optional.view(finished_optional.shape[0], 1)
+        finished_optional = finished_optional.expand(-1, k)
+        expert_idx = torch.where(finished_optional, num_expert, expert_idx)
+    if y.dim() < 2:
+        raise ValueError("Variable y must have at least 2 dimensions.")
+    row_idx = torch.arange(y.shape[0] * y.shape[1]).reshape(y.shape[1], y.shape[0]).t()
+    return y, expert_idx, row_idx

msprobe/pytorch/bench_functions/sort_v2.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+def npu_sort_v2(x, dim=-1, descending=False, out=None):
+    y, _ = torch.sort(x, dim=dim, descending=descending)
+    return y

msprobe/pytorch/common/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -18,6 +18,7 @@ import os
 import pickle
 import random
 import stat
+import inspect
 from functools import wraps
 import numpy as np
@@ -27,7 +28,7 @@ from msprobe.core.common.exceptions import DistributedNotInitializedError
 from msprobe.core.common.file_utils import (FileCheckConst, change_mode,
                                             check_file_or_directory_path, check_path_before_create, FileOpen)
 from msprobe.core.common.log import logger
-from msprobe.core.common.utils import check_seed_all
+from msprobe.core.common.utils import check_seed_all, is_save_variable_valid
 from packaging import version
 try:
@@ -56,7 +57,7 @@ def parameter_adapter(func):
     @wraps(func)
     def inner(self, *args, **kwargs):
-        if self.op_name_ == "__getitem__" and len(args) > 1 and isinstance(args[1], torch.Tensor):
+        if self.api_name == "__getitem__" and len(args) > 1 and isinstance(args[1], torch.Tensor):
             input_tensor = args[0]
             indices = args[1]
             if indices.dtype == torch.uint8:
@@ -76,7 +77,7 @@ def parameter_adapter(func):
                 else:
                     res = [input_tensor[tensor_index] for tensor_index in indices]
                     return getattr(torch._C._VariableFunctionsClass, "stack")(res, 0)
-        if self.op_name_ == "__eq__" and len(args) > 1 and args[1] is None:
+        if self.api_name == "__eq__" and len(args) > 1 and args[1] is None:
             return False
         return func(self, *args, **kwargs)
@@ -260,6 +261,10 @@ class Const:
     NPU = 'NPU'
     DISTRIBUTED = 'Distributed'
+    HIFLOAT8_TYPE = "torch_npu.HiFloat8Tensor"
+    FLOAT8_E5M2_TYPE = "torch.float8_e5m2"
+    FLOAT8_E4M3FN_TYPE = "torch.float8_e4m3fn"
     RAISE_PRECISION = {
         torch.float16: torch.float32,
         torch.bfloat16: torch.float32,
@@ -402,3 +407,91 @@ def load_api_data(api_data_bytes):
     except Exception as e:
         raise RuntimeError(f"load api_data from bytes failed") from e
     return buffer
+def is_recomputation():
+    """Check if the current operation is in the re-computation phase.
+    This function inspects the current call stack to indicate whether the current operation is in the
+    re-computation phase. We use a blacklist mechanism, now supported megatron and mindspeed framework.
+    megatron: The 'backward' function is called by the 'torch/autograd/function.py' file.
+    mindspeed: The 'checkpoint_function_backward' function is called by the 'torch/autograd/function.py'
+    file or the custom module(use CheckpointWithoutOutput) with the 'recompute_fn' function is executed within the
+    'torch/utils/checkpoint.py' file.
+    Returns:
+        bool: True if in the re-computation phase, False otherwise.
+    """
+    backward_function_indices = []
+    try:
+        call_stack = inspect.stack()
+    except Exception as e:
+        logger.warning(f"Failed to capture stack trace, recomputation validation may be incorrect, error info: {e}.")
+        return False
+    # Identify the function 'backward' is being executed within the 'torch/_tensor.py' file.
+    for frame_info in call_stack:
+        if frame_info.function == "recompute_fn" and frame_info.filename.endswith('torch/utils/checkpoint.py'):
+            del call_stack
+            return True
+    # Identify indices in the call stack where the specific function is being executed
+    for idx, frame_info in enumerate(call_stack):
+        if frame_info.function == Const.BACKWARD or frame_info.function == 'checkpoint_function_backward':
+            backward_function_indices.append(idx)
+    # Check if the execution is within 'torch/autograd/function.py' file
+    for idx in backward_function_indices:
+        # The Megatron and MindSpeed L0&L1 scenes
+        if idx + 1 < len(call_stack) and call_stack[idx + 1].filename.endswith('torch/autograd/function.py'):
+            del call_stack
+            return True
+        # The latest MindSpeed L2 and ModelLink scenes
+        if idx + 2 < len(call_stack) and call_stack[idx + 2].filename.endswith('torch/autograd/function.py'):
+            del call_stack
+            return True
+    del call_stack
+    return False
+def check_save_param(variable, name, save_backward):
+    # try catch this api to skip invalid call
+    valid_data_types = tuple([torch.Tensor, int, float, str])
+    if not is_save_variable_valid(variable, valid_data_types):
+        valid_data_types_with_nested_types = valid_data_types + (dict, tuple, list)
+        logger.warning("PrecisionDebugger.save variable type not valid, "
+                       f"should be one of {valid_data_types_with_nested_types}"
+                       "Skip current save process.")
+        raise ValueError
+    if not isinstance(name, str):
+        logger.warning("PrecisionDebugger.save name not valid, "
+                       "should be string. "
+                       "skip current save process.")
+        raise ValueError
+    if not isinstance(save_backward, bool):
+        logger.warning("PrecisionDebugger.save_backward name not valid, "
+                       "should be bool. "
+                       "Skip current save process.")
+        raise ValueError
+def replace_last_occurrence(text, old, new):
+    if text is None:
+        return text
+    index = text.rfind(old)
+    if index != -1:
+        return text[:index] + text[index:].replace(old, new, 1)
+    return text
+def is_hifloat8_tensor(tensor):
+    if not is_gpu and hasattr(torch_npu, "HiFloat8Tensor") and isinstance(tensor, torch_npu.HiFloat8Tensor):
+        return True
+    return False
+def is_float8_tensor(tensor):
+    if str(tensor.dtype) in [Const.FLOAT8_E5M2_TYPE, Const.FLOAT8_E4M3FN_TYPE]:
+        return True
+    return is_hifloat8_tensor(tensor)

msprobe/pytorch/debugger/debugger_config.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -26,7 +26,7 @@ class DebuggerConfig:
         self.task = task or common_config.task or Const.STATISTICS
         self.rank = common_config.rank if common_config.rank else []
         self.step = common_config.step if common_config.step else []
-        self.level = level or common_config.level or "L1"
+        self.level = level or common_config.level or Const.LEVEL_L1
         self.enable_dataloader = common_config.enable_dataloader
         self.scope = task_config.scope if task_config.scope else []
         self.list = task_config.list if task_config.list else []
@@ -36,10 +36,6 @@ class DebuggerConfig:
         self.framework = Const.PT_FRAMEWORK
         self.async_dump = common_config.async_dump if common_config.async_dump else False
-        if self.level == Const.LEVEL_L2:
-            self.is_backward_kernel_dump = False
-            self._check_and_adjust_config_with_l2()
         if self.task == Const.FREE_BENCHMARK:
             self.fuzz_device = task_config.fuzz_device
             self.handler_type = task_config.handler_type
@@ -65,6 +61,10 @@ class DebuggerConfig:
         self.check()
+        if self.level == Const.LEVEL_L2:
+            self.is_backward_kernel_dump = False
+            self._check_and_adjust_config_with_l2()
     def check_kwargs(self):
         if self.task and self.task not in Const.TASK_LIST:
             raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
@@ -78,6 +78,16 @@ class DebuggerConfig:
         if not isinstance(self.async_dump, bool):
             raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
                                    f"The parameters async_dump should be bool.")
+        if self.async_dump and self.task == Const.TENSOR and not self.list:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"The parameters async_dump is true in tensor task, the parameters list cannot be "
+                                   f"empty.")
+        if self.task == Const.STRUCTURE and self.level not in [Const.LEVEL_L0, Const.LEVEL_MIX]:
+            logger.warning_on_rank_0(
+                f"When the task is set to structure, the level should be one of {[Const.LEVEL_L0, Const.LEVEL_MIX]}. "
+                f"If not, the default level is {Const.LEVEL_MIX}."
+            )
+            self.level = Const.LEVEL_MIX
     def check(self):
         self.check_kwargs()
@@ -93,10 +103,10 @@ class DebuggerConfig:
             logger.error_on_rank_0(
                 f"For level {self.level}, PrecisionDebugger or start interface must receive a 'model' parameter.")
             raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR, f"missing the parameter 'model'")
         instance.model = start_model if start_model is not None else instance.model
         if isinstance(instance.model, torch.nn.Module):
-            return
+            return
         error_model = None
         if isinstance(instance.model, (list, tuple)):
@@ -108,7 +118,7 @@ class DebuggerConfig:
             error_model = instance.model
         if error_model is not None:
-            error_info = (f"The 'model' parameter must be a torch.nn.Moudle or list[torch.nn.Moudle] "
+            error_info = (f"The 'model' parameter must be a torch.nn.Module or list[torch.nn.Module] "
                           f"type, currently there is a {type(error_model)} type.")
             raise MsprobeException(
                 MsprobeException.INVALID_PARAM_ERROR, error_info)

mindstudio-probe 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

mindstudio-probe 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl