PyPI - mindstudio-probe - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
msprobe/README.md +39 -3
msprobe/config.json +1 -3
msprobe/core/advisor/advisor.py +8 -3
msprobe/core/common/const.py +113 -13
msprobe/core/common/exceptions.py +25 -3
msprobe/core/common/file_utils.py +150 -26
msprobe/core/common/inplace_op_checker.py +15 -0
msprobe/core/common/log.py +27 -9
msprobe/core/common/utils.py +182 -69
msprobe/core/common_config.py +44 -15
msprobe/core/compare/acc_compare.py +207 -142
msprobe/core/compare/check.py +2 -5
msprobe/core/compare/compare_cli.py +21 -4
msprobe/core/compare/highlight.py +124 -55
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
msprobe/core/compare/npy_compare.py +52 -23
msprobe/core/compare/utils.py +272 -247
msprobe/core/data_dump/data_collector.py +13 -11
msprobe/core/data_dump/data_processor/base.py +46 -16
msprobe/core/data_dump/data_processor/mindspore_processor.py +4 -4
msprobe/core/data_dump/data_processor/pytorch_processor.py +156 -59
msprobe/core/data_dump/scope.py +113 -34
msprobe/core/grad_probe/constant.py +27 -13
msprobe/core/grad_probe/grad_compare.py +18 -1
msprobe/core/grad_probe/utils.py +30 -2
msprobe/core/overflow_check/abnormal_scene.py +185 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +10 -0
msprobe/docs/02.config_introduction.md +49 -22
msprobe/docs/03.config_examples.md +2 -9
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +3 -1
msprobe/docs/06.data_dump_MindSpore.md +157 -90
msprobe/docs/07.accuracy_checker_PyTorch.md +12 -12
msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +19 -13
msprobe/docs/11.accuracy_compare_MindSpore.md +104 -13
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
msprobe/docs/17.grad_probe.md +5 -6
msprobe/docs/19.monitor.md +468 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +386 -0
msprobe/docs/22.visualization_MindSpore.md +384 -0
msprobe/docs/23.tool_function_introduction.md +28 -0
msprobe/docs/FAQ.md +3 -0
msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/mindspore/__init__.py +15 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +113 -145
msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
msprobe/mindspore/api_accuracy_checker/main.py +27 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +33 -12
msprobe/mindspore/common/const.py +33 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +43 -4
msprobe/mindspore/compare/distributed_compare.py +22 -22
msprobe/mindspore/compare/ms_compare.py +271 -248
msprobe/mindspore/compare/ms_graph_compare.py +81 -47
msprobe/mindspore/debugger/debugger_config.py +4 -1
msprobe/mindspore/debugger/precision_debugger.py +7 -1
msprobe/mindspore/dump/dump_tool_factory.py +3 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +12 -2
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +13 -16
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +25 -0
msprobe/mindspore/dump/jit_dump.py +17 -5
msprobe/mindspore/dump/kernel_graph_dump.py +2 -4
msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +145 -39
msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
msprobe/mindspore/free_benchmark/common/utils.py +19 -4
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
msprobe/mindspore/grad_probe/global_context.py +28 -8
msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +24 -10
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/ms_config.py +22 -15
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +2 -4
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +36 -30
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +24 -7
msprobe/pytorch/__init__.py +3 -2
msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +6 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +19 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +13 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +77 -53
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +15 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +100 -6
msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
msprobe/pytorch/bench_functions/swiglu.py +10 -2
msprobe/pytorch/common/parse_json.py +6 -6
msprobe/pytorch/common/utils.py +56 -5
msprobe/pytorch/compare/distributed_compare.py +8 -9
msprobe/pytorch/compare/pt_compare.py +8 -6
msprobe/pytorch/debugger/debugger_config.py +19 -15
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +15 -0
msprobe/pytorch/free_benchmark/common/params.py +8 -1
msprobe/pytorch/free_benchmark/common/utils.py +26 -4
msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -3
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +10 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
msprobe/pytorch/hook_module/wrap_functional.py +14 -12
msprobe/pytorch/module_processer.py +2 -5
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +340 -0
msprobe/pytorch/monitor/distributed/__init__.py +0 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +870 -0
msprobe/pytorch/monitor/module_metric.py +193 -0
msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
msprobe/pytorch/monitor/optimizer_collect.py +295 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
msprobe/pytorch/monitor/utils.py +250 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +29 -38
msprobe/pytorch/online_dispatch/dispatch.py +50 -25
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +53 -32
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
msprobe/pytorch/online_dispatch/utils.py +49 -21
msprobe/pytorch/parse_tool/lib/compare.py +12 -18
msprobe/pytorch/parse_tool/lib/config.py +1 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +1 -2
msprobe/pytorch/parse_tool/lib/utils.py +16 -35
msprobe/pytorch/parse_tool/lib/visualization.py +2 -0
msprobe/pytorch/pt_config.py +31 -8
msprobe/pytorch/service.py +15 -5
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +165 -0
msprobe/visualization/builder/msprobe_adapter.py +205 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +130 -0
msprobe/visualization/compare/mode_adapter.py +211 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +124 -0
msprobe/visualization/graph/graph.py +200 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +214 -0
msprobe/visualization/utils.py +232 -0
mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
msprobe/docs/04.acl_config_examples.md +0 -78
msprobe/mindspore/compare/layer_mapping.py +0 -146
msprobe/mindspore/compare/modify_mapping.py +0 -107
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0

msprobe/pytorch/bench_functions/npu_fusion_attention.py CHANGED Viewed

@@ -50,8 +50,8 @@ else:
 from msprobe.pytorch.common.utils import logger
 from msprobe.core.common.const import Const, CompareConst
-gtype = torch.float64  # arm host必须选择float64，x86环境选择float32即可，64也行。arm计算很慢，s=8k的场景建议使用x86
-softmax_build_mode = "QKV"  # "MAX_SUM"
+GTYPE = torch.float64  # arm host必须选择float64，x86环境选择float32即可，64也行。arm计算很慢，s=8k的场景建议使用x86
+SOFTMAX_BUILD_MODE = "QKV"  # "MAX_SUM"
 def softmax_forward(x):
@@ -166,6 +166,18 @@ def parse_bsnd_args(query, key, head_num, input_layout):
 def convert_from_bnsd(_input, input_layout):
+    """
+    transform qkv from bnsd to input_layout.
+    B: batch_size
+    S: sequence_length
+    N: num_heads
+    D: head_dim
+    Args:
+       _input (torch.Tensor): tensor of shape (B,N,S,D)
+        input_layout (str): "BSH" or "SBH" or "BSND" or "BNSD" or "TND"
+    Returns:
+        tensor of shape (B,N,S,D) or (B,S,N,D) or (S,B,H) or (B,S,H)
+    """
     if input_layout == "BSH":
         # (B,N,S,D)=>(B,S,N*D)
         out = rearrange(_input, 'b n s d -> b s (n d)').contiguous()
@@ -183,7 +195,19 @@ def convert_from_bnsd(_input, input_layout):
 def convert_to_bnsd(_input, n, input_layout):
-    # 默认"BNSD"无需处理
+    """
+    transform qkv from input_layout to bnsd.
+    B: batch_size
+    S: sequence_length
+    N: num_heads
+    D: head_dim
+    Args:
+        _input (torch.Tensor): tensor of shape (B,N,S,D) or (B,S,N,D) or (S,B,H) or (B,S,H)
+        n (int): num_heads
+        input_layout (str):"BSH" or "SBH" or "BSND" or "BNSD" or "TND"
+    Returns:
+        tensor of shape (B,N,S,D)
+    """
     if input_layout == "BSH":
         # (B,S,N*D)=>(B,N,S,D)
         out = rearrange(_input, 'b s (n d) -> b n s d', n=n)
@@ -199,7 +223,68 @@ def convert_to_bnsd(_input, n, input_layout):
         out = _input
     if out.dim() != 4:
         raise ValueError(f"convert qkv format failed with input_layout {input_layout}.")
-    return out.to(gtype)
+    return out.to(GTYPE)
+def convert_from_bsnd(_input, input_layout):
+    """
+    transform qkv from bsnd to input_layout.
+    B: batch_size
+    S: sequence_length
+    N: num_heads
+    D: head_dim
+    Args:
+       _input (torch.Tensor): tensor of shape (B,S,N,D)
+        input_layout (str): "BSH" or "SBH" or "BSND" or "BNSD" or "TND"
+    Returns:
+        tensor of shape (B,N,S,D) or (B,S,N,D) or (S,B,H) or (B,S,H)
+    """
+    if input_layout == "BSH":
+        # (B,S,N,D)=>(B,S,N*D)
+        out = rearrange(_input, 'b s n d -> b s (n d)').contiguous()
+    elif input_layout == "SBH":
+        # (B,S,N,D)=>(S,B,N*D)
+        out = rearrange(_input, 'b s n d -> s b (n d)').contiguous()
+    elif input_layout == "BNSD":
+        # (B,S,N,D)=>(B,N,S,D)
+        out = rearrange(_input, 'b s n d -> b n s d').contiguous()
+    elif input_layout == "TND":
+        raise ValueError(f"input_layout {input_layout} does not supported for now.")
+    else:
+        out = _input
+    return out
+def convert_to_bsnd(_input, n, input_layout):
+    """
+    transform qkv from input_layout to bsnd.
+    B: batch_size
+    S: sequence_length
+    N: num_heads
+    D: head_dim
+    Args:
+        _input (torch.Tensor): tensor of shape (B,N,S,D) or (B,S,N,D) or (S,B,H) or (B,S,H)
+        n (int): num_heads
+        input_layout (str):"BSH" or "SBH" or "BSND" or "BNSD" or "TND"
+    Returns:
+        tensor of shape (B,S,N,D)
+    """
+    if input_layout == "BSH":
+        # (B,S,N*D)=>(B,S,N,D)
+        out = rearrange(_input, 'b s (n d) -> b s n d', n=n)
+    elif input_layout == "SBH":
+        # (S,B,N*D)=>(B,S,N,D)
+        out = rearrange(_input, 's b (n d) -> b s n d', n=n)
+    elif input_layout == "BNSD":
+        # (B,N,S,D)=>(B,S,N,D)
+        out = rearrange(_input, 'b n s d -> b s n d', n=n)
+    elif input_layout == "TND":
+        raise ValueError(f"input_layout {input_layout} does not supported for now.")
+    else:
+        out = _input
+    if out.dim() != 4:
+        raise ValueError(f"convert qkv format failed with input_layout {input_layout}.")
+    return out
 def generate_atten_mask(*args):
@@ -279,7 +364,7 @@ def rebuid_softmax_by_qkv(q, k, atten_mask, pse, scale):
     """
     logger.info("Using QKV to rebuild original softmax")
     qk = calculate_qk(q, k, atten_mask, pse, scale)
-    softmax_res, x_max, x_sum = softmax_forward(qk)
+    softmax_res, _, _ = softmax_forward(qk)
     return softmax_res
@@ -319,6 +404,10 @@ def get_input_layout(*args, **kwargs):
 def npu_fusion_attention_forward_patch(*args, **kwargs):
+    if len(args) < 2:
+        raise RuntimeError("npu_fusion_attention_forward_patch: length of args should greater than or equal to 2.")
     # query, key, value, head_num, input_layout
     head_num = get_head_num(*args, **kwargs)
     input_layout = get_input_layout(*args, **kwargs)
@@ -454,7 +543,7 @@ def npu_fusion_attention_grad(*args, **kwargs):
     value = convert_to_bnsd(value, n2, input_layout)
     k_new, v_new = generate_kv(key, value, n1, n2)
-    if softmax_build_mode == "QKV":
+    if SOFTMAX_BUILD_MODE == "QKV":
         softmax_res = rebuid_softmax_by_qkv(query, k_new, atten_mask, pse, scale_value)
     else:
         softmax_res = rebuild_softmax_by_max_sum(query, k_new, atten_mask, pse, scale_value, softmax_max, softmax_sum)
@@ -531,8 +620,13 @@ def gpu_fusion_attention(*args, **kwargs):
     else:
         alibi_slopes = None
+    input_layout = get_input_layout(*args, **kwargs)
+    query = convert_to_bsnd(query, n1, input_layout)
+    key = convert_to_bsnd(key, n2, input_layout)
+    value = convert_to_bsnd(value, n2, input_layout)
     out = flash_attn_func(
         query, key, value, dropout_p=(1 - keep_prob), softmax_scale=scale, causal=causal_switch,
         window_size=(window_left, window_right), alibi_slopes=alibi_slopes, deterministic=deterministic
     )
+    out = convert_from_bsnd(out, input_layout)
     return out, Const.NONE, Const.NONE

msprobe/pytorch/bench_functions/rotary_mul.py CHANGED Viewed

@@ -40,6 +40,9 @@ def npu_rotary_mul_backward(dy_tensor, x, r1, r2):
     x_shape = x.shape
     h = x.float()
     grad = dy_tensor.float()
+    if len(r1_shape) < 4 or len(x_shape) < 4:
+        raise RuntimeError(f"Shape of r1 and x should at least be 4-dimension, "
+                           f"but got r1 shape:{r1_shape}, x shape:{x_shape}")
     condition_1 = (r1_shape[0] == 1
                    and r1_shape[1] == x_shape[1]
                    and r1_shape[2] == 1
@@ -68,4 +71,5 @@ def npu_rotary_mul_backward(dy_tensor, x, r1, r2):
             for j in range(x_shape[2]):
                 r2_grad[:, 0, 0, :] += (x_new2[:, i, j, :] * grad[:, i, j, :])
                 r1_grad[:, 0, 0, :] += (h[:, i, j, :] * grad[:, i, j, :])
     return x.grad.cpu(), r1_grad.cpu(), r2_grad.cpu()

msprobe/pytorch/bench_functions/swiglu.py CHANGED Viewed

@@ -19,7 +19,11 @@ import torch
 def npu_swiglu(x, dim=-1):
     tensor_dtype = x.dtype
-    in_tensors = torch.chunk(x, 2, dim=dim)
+    try:
+        in_tensors = torch.chunk(x, 2, dim=dim)
+    except Exception as e:
+        raise RuntimeError(f"Invalid chunk x into 2 tensors with shape {x.shape} and dimension {dim}") from e
     if tensor_dtype == torch.float32:
         tensor_scalar = torch.sigmoid(torch.mul(in_tensors[0], 1.0))
         output_data = torch.mul(torch.mul(tensor_scalar, in_tensors[0]), in_tensors[1])
@@ -34,7 +38,11 @@ def npu_swiglu(x, dim=-1):
 def npu_swiglu_backward(grad, x, dim=-1):
     tensor_dtype = grad.dtype
-    in_tensors = torch.chunk(x, 2, dim=dim)
+    try:
+        in_tensors = torch.chunk(x, 2, dim=dim)
+    except Exception as e:
+        raise RuntimeError(f"Invalid chunk x into 2 tensors with shape {x.shape} and dimension {dim}") from e
     tensor_grad_out = grad
     if tensor_dtype == torch.float16:

msprobe/pytorch/common/parse_json.py CHANGED Viewed

@@ -13,20 +13,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 from msprobe.core.common.exceptions import ParseJsonException
-from msprobe.core.common.file_utils import FileOpen
+from msprobe.core.common.file_utils import load_json
+from msprobe.core.common.log import logger
 def parse_json_info_forward_backward(json_path):
-    with FileOpen(json_path, 'r') as f:
-        dump_json = json.load(f)
+    dump_json = load_json(json_path)
     real_data_path = dump_json.get("dump_data_dir")
     dump_data = dump_json.get("data")
+    if dump_data is None:
+        raise ParseJsonException(ParseJsonException.InvalidDumpJson, "something wrong with dump, no data found in dump.json")
     if not dump_data:
-        raise ParseJsonException(ParseJsonException.InvalidDumpJson, "dump数据中没有data字段")
+        logger.warning("data field is empty, no overflow data found.")
     forward_data = {}
     backward_data = {}

msprobe/pytorch/common/utils.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import io
 import os
+import pickle
 import random
 import stat
 from functools import wraps
@@ -24,7 +25,7 @@ import torch
 import torch.distributed as dist
 from msprobe.core.common.exceptions import DistributedNotInitializedError
 from msprobe.core.common.file_utils import (FileCheckConst, change_mode,
-                                            check_file_or_directory_path, check_path_before_create)
+                                            check_file_or_directory_path, check_path_before_create, FileOpen)
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import check_seed_all
 from packaging import version
@@ -75,7 +76,7 @@ def parameter_adapter(func):
                 else:
                     res = [input_tensor[tensor_index] for tensor_index in indices]
                     return getattr(torch._C._VariableFunctionsClass, "stack")(res, 0)
-        if self.op_name_ == "__eq__" and args[1] is None:
+        if self.op_name_ == "__eq__" and len(args) > 1 and args[1] is None:
             return False
         return func(self, *args, **kwargs)
@@ -269,17 +270,17 @@ def load_pt(pt_path, to_cpu=False):
     check_file_or_directory_path(pt_path)
     try:
         if to_cpu:
-            pt = torch.load(pt_path, map_location=torch.device("cpu"))
+            pt = torch.load(pt_path, map_location=torch.device("cpu"), weights_only=True)
         else:
-            pt = torch.load(pt_path)
+            pt = torch.load(pt_path, weights_only=True)
     except Exception as e:
         raise RuntimeError(f"load pt file {pt_path} failed") from e
     return pt
 def save_pt(tensor, filepath):
-    filepath = os.path.realpath(filepath)
     check_path_before_create(filepath)
+    filepath = os.path.realpath(filepath)
     try:
         torch.save(tensor, filepath)
     except Exception as e:
@@ -290,6 +291,56 @@ def save_pt(tensor, filepath):
     change_mode(filepath, FileCheckConst.DATA_FILE_AUTHORITY)
+class TypeCheckingUnpickler(pickle.Unpickler):
+    """
+    This class is a subclass of pickle.Unpickler, which is used to unpickle pickled objects.
+    It overrides the find_class method to add type checking functionality.
+    """
+    allowed_types = [
+        "str",
+        "ApiData",
+        "OrderedDict",
+        "_rebuild_tensor_v2",  # from torch.utils
+        "_load_from_bytes"  # from torch.storage
+    ]
+    def find_class(self, module, name):
+        """
+        Method to find the class of the object to be unpickled.
+        Throws pickle.UnpicklingError If the object type is not in the allowed types list.
+        """
+        if name in self.allowed_types:
+            return super().find_class(module, name)
+        raise pickle.UnpicklingError("Unsupported object type: {}.{}".format(module, name))
+def save_pkl(tensor, filepath):
+    """Save ApiData or str objection by pickle"""
+    check_path_before_create(filepath)
+    filepath = os.path.realpath(filepath)
+    try:
+        with FileOpen(filepath, 'wb') as f:
+            pickle.dump(tensor, f)
+    except Exception as e:
+        logger.error("Save pt file failed, please check according possible error causes: "
+                     "1. out of disk space or disk error, "
+                     "2. no permission to write files, etc.")
+        raise RuntimeError(f"save pt file {filepath} failed") from e
+    change_mode(filepath, FileCheckConst.DATA_FILE_AUTHORITY)
+def load_pkl(pt_path):
+    """Load ApiData or str objection by pickle for accuracy_checker_online"""
+    check_file_or_directory_path(pt_path)
+    pt_path = os.path.realpath(pt_path)
+    try:
+        with FileOpen(pt_path, 'rb') as f:
+            pt = TypeCheckingUnpickler(f).load()
+    except Exception as e:
+        raise RuntimeError(f"load pt file {pt_path} failed: {e}") from e
+    return pt
 def save_api_data(api_data):
     """Save data to io stream"""
     try:

msprobe/pytorch/compare/distributed_compare.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import os
 from msprobe.core.common.utils import CompareException, check_compare_param, \
-    check_configuration_param, task_dumppath_get
+    check_configuration_param, set_dump_path, get_dump_mode
 from msprobe.core.common.file_utils import create_directory
 from msprobe.core.common.exceptions import FileCheckException
 from msprobe.pytorch.common.log import logger
@@ -30,6 +30,7 @@ def compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs):
     stack_mode = kwargs.get('stack_mode', False)
     auto_analyze = kwargs.get('auto_analyze', True)
     fuzzy_match = kwargs.get('fuzzy_match', False)
+    is_print_compare_log = kwargs.get('is_print_compare_log', True)
     # get the ranks and match by order
     npu_ranks = sorted(check_and_return_dir_contents(npu_dump_dir, 'rank'))
     bench_ranks = sorted(check_and_return_dir_contents(bench_dump_dir, 'rank'))
@@ -49,18 +50,16 @@ def compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs):
             'npu_json_path': npu_path,
             'bench_json_path': bench_path,
             'stack_json_path': stack_path,
-            'is_print_compare_log': True
+            'is_print_compare_log': is_print_compare_log
         }
         try:
-            summary_compare, md5_compare = task_dumppath_get(dump_result_param)
-            check_configuration_param(stack_mode, auto_analyze, fuzzy_match,
-                                      dump_result_param.get('is_print_compare_log', True))
+            set_dump_path(dump_result_param)
+            dump_mode = get_dump_mode(dump_result_param)
+            check_configuration_param(stack_mode, auto_analyze, fuzzy_match, is_print_compare_log)
             create_directory(output_path)
-            check_compare_param(dump_result_param, output_path,
-                                summary_compare=summary_compare, md5_compare=md5_compare)
+            check_compare_param(dump_result_param, output_path, dump_mode)
         except (CompareException, FileCheckException) as error:
             logger.error('Compare failed. Please check the arguments and do it again!')
             raise CompareException(error.code) from error
         pt_comparator = PTComparator()
-        pt_comparator.compare_core(dump_result_param, output_path, suffix=f'_{nr}-{br}',
-                                   summary_compare=summary_compare, md5_compare=md5_compare, **kwargs)
+        pt_comparator.compare_core(dump_result_param, output_path, suffix=f'_{nr}-{br}', dump_mode=dump_mode, **kwargs)

msprobe/pytorch/compare/pt_compare.py CHANGED Viewed

@@ -19,8 +19,8 @@ from msprobe.core.common.const import FileCheckConst
 from msprobe.pytorch.common.log import logger
 from msprobe.core.common.exceptions import FileCheckException
 from msprobe.core.compare.acc_compare import Comparator
-from msprobe.core.common.utils import check_configuration_param, task_dumppath_get, check_compare_param, \
-    CompareException
+from msprobe.core.common.utils import check_configuration_param, check_compare_param, \
+    CompareException, set_dump_path, get_dump_mode
 from msprobe.core.common.file_utils import FileChecker, create_directory, load_yaml
 from msprobe.pytorch.common.utils import load_pt
@@ -45,6 +45,8 @@ class PTComparator (Comparator):
         return mapping_dict
     def read_npy_data(self, dir_path, file_name):
+        if not file_name:
+            return None
         data_path = os.path.join(dir_path, file_name)
         path_checker = FileChecker(data_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE,
                                 FileCheckConst.PT_SUFFIX, False)
@@ -68,15 +70,15 @@ class PTComparator (Comparator):
 def compare(input_param, output_path, stack_mode=False, auto_analyze=True, fuzzy_match=False, **kwargs):
     try:
-        summary_compare, md5_compare = task_dumppath_get(input_param)
+        set_dump_path(input_param)
+        dump_mode = get_dump_mode(input_param)
         check_configuration_param(stack_mode, auto_analyze, fuzzy_match, input_param.get('is_print_compare_log', True))
         create_directory(output_path)
-        check_compare_param(input_param, output_path, summary_compare, md5_compare)
+        check_compare_param(input_param, output_path, dump_mode)
         data_mapping = kwargs.get('data_mapping', None)
     except (CompareException, FileCheckException) as error:
         logger.error('Compare failed. Please check the arguments and do it again!')
         raise CompareException(error.code) from error
     pt_comparator = PTComparator(data_mapping)
     pt_comparator.compare_core(input_param, output_path, stack_mode=stack_mode,
-                 auto_analyze=auto_analyze, fuzzy_match=fuzzy_match, summary_compare=summary_compare,
-                 md5_compare=md5_compare)
+                 auto_analyze=auto_analyze, fuzzy_match=fuzzy_match, dump_mode=dump_mode)

msprobe/pytorch/debugger/debugger_config.py CHANGED Viewed

@@ -31,14 +31,14 @@ class DebuggerConfig:
         self.scope = task_config.scope if task_config.scope else []
         self.list = task_config.list if task_config.list else []
         self.data_mode = task_config.data_mode if task_config.data_mode else ["all"]
-        self.backward_input_list = task_config.backward_input if task_config.backward_input else []
-        self.backward_input = {}
-        self.acl_config = common_config.acl_config if common_config.acl_config else ""
-        self.is_forward_acl_dump = True
         self.summary_mode = task_config.summary_mode if task_config.summary_mode else Const.STATISTICS
         self.overflow_nums = task_config.overflow_nums if task_config.overflow_nums else 1
         self.framework = Const.PT_FRAMEWORK
+        if self.level == Const.LEVEL_L2:
+            self.is_backward_kernel_dump = False
+            self._check_and_adjust_config_with_l2()
         if self.task == Const.FREE_BENCHMARK:
             self.fuzz_device = task_config.fuzz_device
             self.handler_type = task_config.handler_type
@@ -59,20 +59,11 @@ class DebuggerConfig:
             self.tls_path = task_config.tls_path if task_config.tls_path else ""
             self.host = task_config.host if task_config.host else ""
             self.port = task_config.port if task_config.port else -1
+            self.online_run_ut_recompute = task_config.online_run_ut_recompute \
+                if isinstance(task_config.online_run_ut_recompute, bool) else False
         self.check()
-        if self.level == "L2":
-            if not self.scope or not isinstance(self.scope, list) or len(self.scope) != 1:
-                raise ValueError("scope must be configured as a list with one api name")
-            if isinstance(self.scope[0], str) and Const.BACKWARD in self.scope[0] and not self.backward_input_list:
-                raise ValueError("backward_input must be configured when scope contains 'backward'")
-            if Const.BACKWARD in self.scope[0]:
-                self.is_forward_acl_dump = False
-                for index, scope_spec in enumerate(self.scope):
-                    self.scope[index] = scope_spec.replace(Const.BACKWARD, Const.FORWARD)
-                    self.backward_input[self.scope[index]] = self.backward_input_list[index]
     def check_kwargs(self):
         if self.task and self.task not in Const.TASK_LIST:
             raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
@@ -106,3 +97,16 @@ class DebuggerConfig:
             logger.error_on_rank_0(f"The 'model' parameter of start must be a torch.nn.Module type.")
             raise MsprobeException(
                 MsprobeException.INVALID_PARAM_ERROR, f"model must be a torch.nn.Module")
+    def _check_and_adjust_config_with_l2(self):
+        if self.scope:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"When level is set to L2, the scope cannot be configured.")
+        if not self.list or len(self.list) != 1:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"When level is set to L2, the list must be configured as a list with one api name.")
+        api_name = self.list[0]
+        if api_name.endswith(Const.BACKWARD):
+            self.is_backward_kernel_dump = True
+            api_forward_name = api_name[:-len(Const.BACKWARD)] + Const.FORWARD
+            self.list.append(api_forward_name)

msprobe/pytorch/dump/kernel_dump/kernel_config.py ADDED Viewed

@@ -0,0 +1,33 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from msprobe.core.common.file_utils import save_json
+def create_kernel_config_json(dump_path, cur_rank):
+    kernel_config_name = "kernel_config.json" if cur_rank == '' else f"kernel_config_{cur_rank}.json"
+    kernel_config_path = os.path.join(dump_path, kernel_config_name)
+    config_info = {
+        "dump": {
+            "dump_list": [],
+            "dump_path": dump_path,
+            "dump_mode": "all",
+            "dump_op_switch": "on"
+        }
+    }
+    save_json(kernel_config_path, config_info, indent=4)
+    return kernel_config_path

msprobe/pytorch/free_benchmark/common/constant.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Dict
 import numpy as np

msprobe/pytorch/free_benchmark/common/counter.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from collections import defaultdict
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig

msprobe/pytorch/free_benchmark/common/enums.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from msprobe.core.common.const import Const

msprobe/pytorch/free_benchmark/common/params.py CHANGED Viewed

@@ -17,6 +17,7 @@ from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional, Tuple
 import torch
+from msprobe.core.common.exceptions import FreeBenchmarkException
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.enums import (
     DeviceType,
@@ -128,7 +129,13 @@ def make_unequal_row(
         row.max_rel = ratio - 1
     origin_tensor = data_params.original_result
     perturbed_tensor = data_params.perturbed_result
-    if index:
+    if index is not None:
+        if index >= len(origin_tensor) or index >= len(perturbed_tensor):
+            err_msg = f"When generating unequal results, index {index} of output is out of bounds. please check!"
+            raise FreeBenchmarkException(
+                FreeBenchmarkException.OutputIndexError,
+                error_info=err_msg,
+            )
         origin_tensor = origin_tensor[index]
         perturbed_tensor = perturbed_tensor[index]
         row.output_index = index

msprobe/pytorch/free_benchmark/common/utils.py CHANGED Viewed

@@ -13,7 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import torch
+from msprobe.core.common.exceptions import FreeBenchmarkException
+from msprobe.core.common.utils import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark.common.enums import DeviceType
@@ -51,6 +54,7 @@ class Tools:
         return api_name.rsplit(".", 2)[0]
     @staticmethod
+    @recursion_depth_decorator("FreeBenchmark: Tools.convert_device_and_dtype")
     def convert_device_and_dtype(
         tensor_seq, device: str = DeviceType.CPU, change_dtype: bool = False
     ):
@@ -73,23 +77,41 @@ class Tools:
         return tensor_seq
     @staticmethod
+    @recursion_depth_decorator("FreeBenchmark: Tools.convert_fuzz_output_to_origin")
     def convert_fuzz_output_to_origin(origin, perturbed):
-        if isinstance(origin, torch.Tensor):
+        if isinstance(origin, torch.Tensor) and isinstance(perturbed, torch.Tensor):
             origin.data = perturbed.to(origin.dtype).to(origin.device)
             return origin
-        if isinstance(origin, dict):
+        if isinstance(origin, dict) and isinstance(perturbed, dict):
             output = dict()
             for key, value in origin.items():
+                if key not in perturbed:
+                    err_msg = f"'{key}' not in perturbed output."
+                    raise FreeBenchmarkException(
+                        FreeBenchmarkException.InvalidPerturbedOutput,
+                        error_info=err_msg,
+                    )
                 output[key] = Tools.convert_fuzz_output_to_origin(value, perturbed[key])
             return output
-        if isinstance(origin, (tuple, list)):
+        if isinstance(origin, (tuple, list)) and isinstance(perturbed, (tuple, list)):
             result = list()
+            if len(perturbed) != len(origin):
+                err_msg = (
+                    f"length of perturbed output ({len(perturbed)}) is different "
+                    f"from the length of original output ({len(origin)})."
+                )
+                raise FreeBenchmarkException(
+                    FreeBenchmarkException.InvalidPerturbedOutput, error_info=err_msg
+                )
             for index_, value in enumerate(origin):
                 result.append(
                     Tools.convert_fuzz_output_to_origin(value, perturbed[index_])
                 )
             return type(origin)(result)
-        return origin
+        err_msg = f"conversion of two outputs with types ({type(origin)}, {type(perturbed)}) is not supported."
+        raise FreeBenchmarkException(
+            FreeBenchmarkException.UnsupportedType, error_info=err_msg
+        )
 class TorchC:

mindstudio-probe 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl