PyPI - mindstudio-probe - Versions diffs - 1.1.1__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

mindstudio-probe 1.1.1py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (226) hide show

{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.2.dist-info}/METADATA +3 -2
mindstudio_probe-1.2.2.dist-info/RECORD +415 -0
msprobe/CMakeLists.txt +5 -0
msprobe/README.md +16 -21
msprobe/config.json +1 -0
msprobe/core/common/const.py +185 -11
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +33 -7
msprobe/core/common/inplace_ops.yaml +4 -0
msprobe/core/common/utils.py +42 -14
msprobe/core/common_config.py +6 -0
msprobe/core/compare/acc_compare.py +139 -128
msprobe/core/compare/check.py +31 -29
msprobe/core/compare/compare_cli.py +17 -16
msprobe/core/compare/highlight.py +186 -99
msprobe/core/compare/layer_mapping/data_scope_parser.py +19 -8
msprobe/core/compare/layer_mapping/layer_mapping.py +21 -14
msprobe/core/compare/layer_mapping/postprocess_pass.py +4 -3
msprobe/core/compare/merge_result/merge_result.py +381 -0
msprobe/core/compare/merge_result/merge_result_cli.py +31 -0
msprobe/core/compare/merge_result/utils.py +81 -0
msprobe/core/compare/multiprocessing_compute.py +2 -2
msprobe/core/compare/npy_compare.py +109 -147
msprobe/core/compare/utils.py +199 -69
msprobe/core/data_dump/data_collector.py +100 -25
msprobe/core/data_dump/data_processor/base.py +130 -28
msprobe/core/data_dump/data_processor/factory.py +8 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +170 -23
msprobe/core/data_dump/data_processor/pytorch_processor.py +175 -64
msprobe/core/data_dump/json_writer.py +54 -8
msprobe/core/data_dump/scope.py +19 -18
msprobe/core/overflow_check/abnormal_scene.py +9 -5
msprobe/core/overflow_check/checker.py +1 -1
msprobe/core/overflow_check/utils.py +1 -1
msprobe/docs/01.installation.md +121 -17
msprobe/docs/02.config_introduction.md +18 -16
msprobe/docs/03.config_examples.md +24 -0
msprobe/docs/05.data_dump_PyTorch.md +107 -58
msprobe/docs/06.data_dump_MindSpore.md +95 -34
msprobe/docs/07.accuracy_checker_PyTorch.md +18 -18
msprobe/docs/09.accuracy_checker_MindSpore.md +8 -6
msprobe/docs/10.accuracy_compare_PyTorch.md +99 -41
msprobe/docs/11.accuracy_compare_MindSpore.md +249 -48
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/19.monitor.md +310 -220
msprobe/docs/21.visualization_PyTorch.md +125 -35
msprobe/docs/22.visualization_MindSpore.md +149 -41
msprobe/docs/23.generate_operator_PyTorch.md +107 -0
msprobe/docs/24.code_mapping_Mindspore.md +28 -0
msprobe/docs/{23.tool_function_introduction.md → 25.tool_function_introduction.md} +1 -0
msprobe/docs/26.data_dump_PyTorch_baseline.md +37 -0
msprobe/docs/27.dump_json_instruction.md +525 -0
msprobe/docs/28.debugger_save_instruction.md +94 -0
msprobe/docs/28.kernel_dump_MindSpore.md +69 -0
msprobe/docs/FAQ.md +26 -2
msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +14 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +22 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/step_count_per_record.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
msprobe/docs/img/visualization/tensorboard_1.png +0 -0
msprobe/docs/img/visualization/tensorboard_2.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_browser_2.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/GPTModel.png +0 -0
msprobe/docs/visualization/ParallelMLP.png +0 -0
msprobe/docs/visualization/layer_mapping_example.md +132 -0
msprobe/docs/visualization/mapping.png +0 -0
msprobe/docs/visualization/mapping1.png +0 -0
msprobe/docs/visualization/module_name.png +0 -0
msprobe/docs/visualization/module_name1.png +0 -0
msprobe/docs/visualization/no_mapping.png +0 -0
msprobe/docs/visualization/no_mapping1.png +0 -0
msprobe/docs/visualization/no_mapping_analyze.png +0 -0
msprobe/docs/visualization/top_layer.png +0 -0
msprobe/mindspore/__init__.py +11 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +80 -28
msprobe/mindspore/api_accuracy_checker/api_runner.py +54 -16
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +2 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +52 -8
msprobe/mindspore/api_accuracy_checker/data_manager.py +37 -0
msprobe/mindspore/api_accuracy_checker/main.py +1 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +12 -6
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +3 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +129 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +24 -1
msprobe/mindspore/api_accuracy_checker/utils.py +6 -1
msprobe/mindspore/code_mapping/bind.py +264 -0
msprobe/mindspore/code_mapping/cmd_parser.py +40 -0
msprobe/mindspore/code_mapping/graph.py +49 -0
msprobe/mindspore/code_mapping/graph_parser.py +226 -0
msprobe/mindspore/code_mapping/main.py +24 -0
msprobe/mindspore/code_mapping/processor.py +34 -0
msprobe/mindspore/common/const.py +3 -1
msprobe/mindspore/common/utils.py +68 -5
msprobe/mindspore/compare/distributed_compare.py +0 -2
msprobe/mindspore/compare/ms_compare.py +105 -63
msprobe/mindspore/compare/ms_graph_compare.py +14 -5
msprobe/mindspore/debugger/debugger_config.py +28 -2
msprobe/mindspore/debugger/precision_debugger.py +100 -12
msprobe/mindspore/dump/hook_cell/api_registry.py +85 -16
msprobe/mindspore/dump/hook_cell/hook_cell.py +60 -38
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +33 -15
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +11 -1
msprobe/mindspore/dump/hook_cell/wrap_api.py +92 -1
msprobe/mindspore/dump/jit_dump.py +7 -6
msprobe/mindspore/dump/kernel_dump/kernel_config.py +33 -0
msprobe/mindspore/dump/kernel_graph_dump.py +7 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +13 -4
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +2 -2
msprobe/mindspore/grad_probe/grad_analyzer.py +24 -12
msprobe/mindspore/grad_probe/hook.py +13 -4
msprobe/mindspore/mindtorch/__init__.py +18 -0
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +255 -0
msprobe/mindspore/monitor/anomaly_detect.py +404 -0
msprobe/mindspore/monitor/distributed/__init__.py +0 -0
msprobe/mindspore/monitor/distributed/distributed_ops.yaml +15 -0
msprobe/mindspore/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +300 -0
msprobe/mindspore/monitor/features.py +63 -0
msprobe/mindspore/monitor/module_hook.py +821 -0
msprobe/mindspore/monitor/module_spec_verifier.py +94 -0
msprobe/mindspore/monitor/utils.py +267 -0
msprobe/mindspore/ms_config.py +13 -3
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +7 -0
msprobe/mindspore/service.py +347 -107
msprobe/msprobe.py +24 -3
msprobe/pytorch/__init__.py +7 -7
msprobe/pytorch/api_accuracy_checker/common/utils.py +31 -16
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -8
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +100 -267
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +4 -1
msprobe/pytorch/api_accuracy_checker/compare/compare.py +69 -68
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +54 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +51 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +2 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +55 -31
msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +106 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +107 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +151 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +226 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +68 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +218 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +104 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +63 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +200 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +57 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +2 -1
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +42 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +64 -19
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +34 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +5 -3
msprobe/pytorch/bench_functions/apply_adam.py +215 -0
msprobe/pytorch/bench_functions/group_norm_silu.py +27 -0
msprobe/pytorch/bench_functions/mish.py +21 -0
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +44 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +42 -10
msprobe/pytorch/bench_functions/sort_v2.py +21 -0
msprobe/pytorch/common/parse_json.py +2 -1
msprobe/pytorch/common/utils.py +116 -2
msprobe/pytorch/compare/distributed_compare.py +17 -29
msprobe/pytorch/compare/pt_compare.py +40 -20
msprobe/pytorch/debugger/debugger_config.py +42 -17
msprobe/pytorch/debugger/precision_debugger.py +56 -12
msprobe/pytorch/dump/module_dump/__init__.py +0 -0
msprobe/pytorch/dump/module_dump/module_dump.py +86 -0
msprobe/pytorch/dump/module_dump/module_processer.py +204 -0
msprobe/pytorch/free_benchmark/common/params.py +2 -1
msprobe/pytorch/free_benchmark/common/utils.py +3 -0
msprobe/pytorch/free_benchmark/compare/grad_saver.py +0 -2
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +31 -47
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -4
msprobe/pytorch/function_factory.py +7 -1
msprobe/pytorch/hook_module/__init__.py +1 -1
msprobe/pytorch/hook_module/hook_module.py +14 -11
msprobe/pytorch/hook_module/register_optimizer_hook.py +59 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +36 -1
msprobe/pytorch/hook_module/wrap_distributed.py +10 -8
msprobe/pytorch/hook_module/wrap_functional.py +0 -40
msprobe/pytorch/monitor/anomaly_analyse.py +1 -1
msprobe/pytorch/monitor/anomaly_detect.py +98 -28
msprobe/pytorch/monitor/csv2tb.py +164 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +25 -14
msprobe/pytorch/monitor/features.py +3 -3
msprobe/pytorch/monitor/module_hook.py +543 -318
msprobe/pytorch/monitor/module_metric.py +27 -48
msprobe/pytorch/monitor/module_spec_verifier.py +3 -1
msprobe/pytorch/monitor/optimizer_collect.py +76 -56
msprobe/pytorch/monitor/unittest/test_monitor.py +24 -9
msprobe/pytorch/monitor/utils.py +84 -48
msprobe/pytorch/online_dispatch/dispatch.py +8 -2
msprobe/pytorch/parse_tool/lib/compare.py +10 -10
msprobe/pytorch/parse_tool/lib/config.py +5 -7
msprobe/pytorch/parse_tool/lib/file_desc.py +15 -1
msprobe/pytorch/parse_tool/lib/interactive_cli.py +10 -10
msprobe/pytorch/parse_tool/lib/parse_exception.py +7 -7
msprobe/pytorch/parse_tool/lib/parse_tool.py +11 -10
msprobe/pytorch/parse_tool/lib/utils.py +18 -19
msprobe/pytorch/parse_tool/lib/visualization.py +9 -10
msprobe/pytorch/pt_config.py +19 -22
msprobe/pytorch/service.py +264 -115
msprobe/visualization/builder/graph_builder.py +93 -10
msprobe/visualization/builder/msprobe_adapter.py +30 -6
msprobe/visualization/compare/graph_comparator.py +64 -14
msprobe/visualization/compare/mode_adapter.py +1 -15
msprobe/visualization/graph/base_node.py +15 -19
msprobe/visualization/graph/distributed_analyzer.py +395 -0
msprobe/visualization/graph/graph.py +9 -0
msprobe/visualization/graph/node_op.py +4 -2
msprobe/visualization/graph_service.py +100 -27
msprobe/visualization/utils.py +24 -31
mindstudio_probe-1.1.1.dist-info/RECORD +0 -341
msprobe/pytorch/functional/module_dump.py +0 -84
msprobe/pytorch/module_processer.py +0 -150
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.2.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.2.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.2.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.2.dist-info}/top_level.txt +0 -0
/msprobe/docs/{data_dump_Mindspore → data_dump_MindSpore}/dynamic_graph_quick_start_example.md +0 -0
/msprobe/{pytorch/functional → mindspore/code_mapping}/__init__.py +0 -0

msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py ADDED Viewed

@@ -0,0 +1,44 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import numpy as np
+def softmax_func(x, axis=None):
+    x = x.float()
+    x_max = x.max(dim=axis, keepdims=True).values
+    x_sub = x - x_max
+    y = torch.exp(x_sub)
+    x_sum = y.sum(dim=axis, keepdims=True)
+    ans = 0 if (x_sum == 0).any() else y / x_sum
+    return ans
+def npu_moe_gating_top_k_softmax(x, finished_optional, k):
+    input_dtype = x.dtype
+    num_expert = x.shape[-1]
+    softmax = softmax_func(x, -1)
+    softmax = softmax.to(input_dtype)
+    expert_idx = torch.argsort(-softmax, dim=-1, stable=True)
+    expert_idx = expert_idx[:, :k]
+    y = torch.gather(softmax, index=expert_idx, dim=-1)
+    if finished_optional is not None:
+        finished_optional = finished_optional.view(finished_optional.shape[0], 1)
+        finished_optional = finished_optional.expand(-1, k)
+        expert_idx = torch.where(finished_optional, num_expert, expert_idx)
+    row_idx = torch.arange(y.shape[0] * y.shape[1]).reshape(y.shape[1], y.shape[0]).t()
+    return y, expert_idx, row_idx

msprobe/pytorch/bench_functions/npu_fusion_attention.py CHANGED Viewed

@@ -30,6 +30,7 @@
                                        numels=0, prefix=None, sparse_mode=0, gen_mask_parallel=True, sync=False
 """
+from collections import namedtuple
 import torch
 import numpy as np
 from einops import rearrange
@@ -54,6 +55,14 @@ GTYPE = torch.float64  # arm host必须选择float64，x86环境选择float32即
 SOFTMAX_BUILD_MODE = "QKV"  # "MAX_SUM"
+FaForwardParams = namedtuple("FaForwardParams",
+                            ["q", "k", "v", "drop_mask", "atten_mask", "pse", "scale", "keep_prob"])
+FaBackwardParams = namedtuple("FaBackwardParams",
+                            ["dx", "q", "k", "v", "softmax_res", "drop_mask", "pse", "scale", "keep_prob"])
+RebuildSoftmaxParams = namedtuple("RebuildSoftmaxParams",
+                                ["q", "k", "atten_mask", "pse", "scale", "softmax_max", "softmax_sum"])
 def softmax_forward(x):
     x_max = torch.max(x, dim=-1, keepdims=True)[0]
     x_sub = x.sub(x_max)
@@ -99,7 +108,15 @@ def calculate_qk(q, k, atten_mask, pse, scale):
     return qk
-def fusion_attention_forward(q, k, v, drop_mask, atten_mask, pse, scale, keep_prob):
+def fusion_attention_forward(forward_params):
+    q = forward_params.q
+    k = forward_params.k
+    v = forward_params.v
+    drop_mask = forward_params.drop_mask
+    atten_mask = forward_params.atten_mask
+    pse = forward_params.pse
+    scale = forward_params.scale
+    keep_prob = forward_params.keep_prob
     qk = calculate_qk(q, k, atten_mask, pse, scale)
     softmax_res, softmax_max, softmax_sum = softmax_forward(qk)
     if drop_mask is None or len(drop_mask.shape) == 0:
@@ -110,7 +127,16 @@ def fusion_attention_forward(q, k, v, drop_mask, atten_mask, pse, scale, keep_pr
     return y, softmax_max, softmax_sum
-def fusion_attention_backward(dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob):
+def fusion_attention_backward(backward_params):
+    dx = backward_params.dx
+    q = backward_params.q
+    k = backward_params.k
+    v = backward_params.v
+    softmax_res = backward_params.softmax_res
+    drop_mask = backward_params.drop_mask
+    pse = backward_params.pse
+    scale = backward_params.scale
+    keep_prob = backward_params.keep_prob
     dp = torch.matmul(dx, v.permute(0, 1, 3, 2))
     if drop_mask is None or len(drop_mask.shape) == 0:
         drop_res = softmax_res.permute(0, 1, 3, 2)
@@ -368,11 +394,18 @@ def rebuid_softmax_by_qkv(q, k, atten_mask, pse, scale):
     return softmax_res
-def rebuild_softmax_by_max_sum(q, k, atten_mask, pse, scale, softmax_max, softmax_sum):
+def rebuild_softmax_by_max_sum(softmax_params):
     """
     attention = softmax(QK^T/sqrt(d))V
     softmax(x_i) = e^(x_i - x_max_i) / x_sum_i)
     """
+    q = softmax_params.q
+    k = softmax_params.k
+    atten_mask = softmax_params.atten_mask
+    pse = softmax_params.pse
+    scale = softmax_params.scale
+    softmax_max = softmax_params.softmax_max
+    softmax_sum = softmax_params.softmax_sum
     logger.info("Using softmax_max and softmax_sum to rebuild original softmax")
     qk = calculate_qk(q, k, atten_mask, pse, scale)
     if softmax_max.shape[-1] == 0:
@@ -502,10 +535,8 @@ def npu_fusion_attention(*args, **kwargs):
     key = convert_to_bnsd(key, n2, input_layout)
     value = convert_to_bnsd(value, n2, input_layout)
     k_new, v_new = generate_kv(key, value, n1, n2)
-    out_golden, softmax_max, softmax_sum = fusion_attention_forward(q=query, k=k_new, v=v_new,
-                                                                    drop_mask=None, atten_mask=atten_mask,
-                                                                    pse=pse, scale=scale,
-                                                                    keep_prob=keep_prob)
+    forward_params = FaForwardParams(query, k_new, v_new, None, atten_mask, pse, scale, keep_prob)
+    out_golden, softmax_max, softmax_sum = fusion_attention_forward(forward_params)
     if out_golden.dim() == 5:
         out_golden = out_golden.reshape(out_golden.size(0), out_golden.size(1) * out_golden.size(2), out_golden.size(3),
                                         out_golden.size(4))
@@ -546,9 +577,10 @@ def npu_fusion_attention_grad(*args, **kwargs):
     if SOFTMAX_BUILD_MODE == "QKV":
         softmax_res = rebuid_softmax_by_qkv(query, k_new, atten_mask, pse, scale_value)
     else:
-        softmax_res = rebuild_softmax_by_max_sum(query, k_new, atten_mask, pse, scale_value, softmax_max, softmax_sum)
-    dq, dk, dv = fusion_attention_backward(dx, query, k_new, v_new, softmax_res, None, pse, scale_value, keep_prob)
+        softmax_params = RebuildSoftmaxParams(query, k_new, atten_mask, pse, scale_value, softmax_max, softmax_sum)
+        softmax_res = rebuild_softmax_by_max_sum(softmax_params)
+    backward_params = FaBackwardParams(dx, query, k_new, v_new, softmax_res, None, pse, scale_value, keep_prob)
+    dq, dk, dv = fusion_attention_backward(backward_params)
     # N不等长适配by cdy
     if not (n1 == n2):

msprobe/pytorch/bench_functions/sort_v2.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+def npu_sort_v2(x, dim=-1, descending=False, out=None):
+    y, _ = torch.sort(x, dim=dim, descending=descending)
+    return y

msprobe/pytorch/common/parse_json.py CHANGED Viewed

@@ -24,7 +24,8 @@ def parse_json_info_forward_backward(json_path):
     real_data_path = dump_json.get("dump_data_dir")
     dump_data = dump_json.get("data")
     if dump_data is None:
-        raise ParseJsonException(ParseJsonException.InvalidDumpJson, "something wrong with dump, no data found in dump.json")
+        raise ParseJsonException(ParseJsonException.InvalidDumpJson,
+                                 "something wrong with dump, no data found in dump.json")
     if not dump_data:
         logger.warning("data field is empty, no overflow data found.")

msprobe/pytorch/common/utils.py CHANGED Viewed

@@ -18,6 +18,7 @@ import os
 import pickle
 import random
 import stat
+import inspect
 from functools import wraps
 import numpy as np
@@ -105,8 +106,49 @@ def get_rank_if_initialized():
         raise DistributedNotInitializedError("torch distributed environment is not initialized")
-def seed_all(seed=1234, mode=False):
-    check_seed_all(seed, mode)
+def remove_dropout():
+    if torch.__version__ > "1.8":
+        logger.info_on_rank_0("For precision comparison, the probability p in the dropout method is set to 0.")
+        import torch.nn.functional as F
+        from torch import _VF
+        from torch.overrides import has_torch_function_unary, handle_torch_function
+        def function_dropout(input_tensor: torch.Tensor, p: float = 0.5, training: bool = True,
+                             inplace: bool = False) -> torch.Tensor:
+            if has_torch_function_unary(input_tensor):
+                return handle_torch_function(
+                    function_dropout, (input_tensor,), input_tensor, p=0., training=training, inplace=inplace)
+            if p < 0.0 or p > 1.0:
+                raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
+            return _VF.dropout_(input_tensor, 0., training) if inplace else _VF.dropout(input_tensor, 0., training)
+        def function_dropout2d(input_tensor: torch.Tensor, p: float = 0.5, training: bool = True,
+                               inplace: bool = False) -> torch.Tensor:
+            if has_torch_function_unary(input_tensor):
+                return handle_torch_function(
+                    function_dropout2d, (input_tensor,), input_tensor, p=0., training=training, inplace=inplace)
+            if p < 0.0 or p > 1.0:
+                raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
+            return _VF.feature_dropout_(input_tensor, 0., training) if inplace else _VF.feature_dropout(input_tensor,
+                                                                                                        0., training)
+        def function_dropout3d(input_tensor: torch.Tensor, p: float = 0.5, training: bool = True,
+                               inplace: bool = False) -> torch.Tensor:
+            if has_torch_function_unary(input_tensor):
+                return handle_torch_function(
+                    function_dropout3d, (input_tensor,), input_tensor, p=0., training=training, inplace=inplace)
+            if p < 0.0 or p > 1.0:
+                raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
+            return _VF.feature_dropout_(input_tensor, 0., training) if inplace else _VF.feature_dropout(input_tensor,
+                                                                                                        0., training)
+        F.dropout = function_dropout
+        F.dropout2d = function_dropout2d
+        F.dropout3d = function_dropout3d
+def seed_all(seed=1234, mode=False, rm_dropout=True):
+    check_seed_all(seed, mode, rm_dropout)
     try:
         random.seed(seed)
         os.environ['PYTHONHASHSEED'] = str(seed)
@@ -126,6 +168,8 @@ def seed_all(seed=1234, mode=False):
         else:
             torch_npu.npu.manual_seed_all(seed)
             torch_npu.npu.manual_seed(seed)
+        if rm_dropout:
+            remove_dropout()
     except Exception as e:
         logger.error(f"There is an unexpected error while determinating randomness. {e}")
@@ -359,3 +403,73 @@ def load_api_data(api_data_bytes):
     except Exception as e:
         raise RuntimeError(f"load api_data from bytes failed") from e
     return buffer
+def is_recomputation():
+    """Check if the current operation is in the re-computation phase.
+    This function inspects the current call stack to indicate whether the current operation is in the
+    re-computation phase. We use a blacklist mechanism, now supported megatron and mindspeed framework.
+    megatron: The 'backward' function is called by the 'torch/autograd/function.py' file.
+    mindspeed: The 'checkpoint_function_backward' function is called by the 'torch/autograd/function.py'
+    file or the custom module(use CheckpointWithoutOutput) with the 'recompute_fn' function is executed within the
+    'torch/utils/checkpoint.py' file.
+    Returns:
+        bool: True if in the re-computation phase, False otherwise.
+    """
+    backward_function_indices = []
+    call_stack = inspect.stack()
+    # Identify the function 'backward' is being executed within the 'torch/_tensor.py' file.
+    for frame_info in call_stack:
+        if frame_info.function == "recompute_fn" and frame_info.filename.endswith('torch/utils/checkpoint.py'):
+            del call_stack
+            return True
+    # Identify indices in the call stack where the specific function is being executed
+    for idx, frame_info in enumerate(call_stack):
+        if frame_info.function == Const.BACKWARD or frame_info.function == 'checkpoint_function_backward':
+            backward_function_indices.append(idx)
+    # Check if the execution is within 'torch/autograd/function.py' file
+    for idx in backward_function_indices:
+        # The Megatron and MindSpeed L0&L1 scenes
+        if idx + 1 < len(call_stack) and call_stack[idx + 1].filename.endswith('torch/autograd/function.py'):
+            del call_stack
+            return True
+        # The latest MindSpeed L2 and ModelLink scenes
+        if idx + 2 < len(call_stack) and call_stack[idx + 2].filename.endswith('torch/autograd/function.py'):
+            del call_stack
+            return True
+    del call_stack
+    return False
+def check_save_param(variable, name, save_backward):
+    # try catch this api to skip invalid call
+    if not isinstance(variable, (list, dict, torch.Tensor, int, float, str)):
+        logger.warning("PrecisionDebugger.save variable type not valid, "
+                       "should be one of list, dict, torch.Tensor, int, float or string. "
+                       "Skip current save process.")
+        raise ValueError
+    if not isinstance(name, str):
+        logger.warning("PrecisionDebugger.save name not valid, "
+                       "should be string. "
+                       "skip current save process.")
+        raise ValueError
+    if not isinstance(save_backward, bool):
+        logger.warning("PrecisionDebugger.save_backward name not valid, "
+                       "should be bool. "
+                       "Skip current save process.")
+        raise ValueError
+def replace_last_occurrence(text, old, new):
+    if text is None:
+        return text
+    index = text.rfind(old)
+    if index != -1:
+        return text[:index] + text[index:].replace(old, new, 1)
+    return text

msprobe/pytorch/compare/distributed_compare.py CHANGED Viewed

@@ -14,52 +14,40 @@
 # limitations under the License.
 import os
-from msprobe.core.common.utils import CompareException, check_compare_param, \
-    check_configuration_param, set_dump_path, get_dump_mode
-from msprobe.core.common.file_utils import create_directory
 from msprobe.core.common.exceptions import FileCheckException
+from msprobe.core.common.file_utils import create_directory
+from msprobe.core.common.utils import CompareException, check_compare_param, check_configuration_param, get_dump_mode, \
+    set_dump_path
+from msprobe.core.compare.acc_compare import ModeConfig
+from msprobe.core.compare.utils import check_and_return_dir_contents, extract_json, set_stack_json_path
 from msprobe.pytorch.common.log import logger
-from msprobe.pytorch.compare.pt_compare import PTComparator
-from msprobe.core.compare.utils import check_and_return_dir_contents, extract_json
+from msprobe.pytorch.compare.pt_compare import PTComparator, compare
 def compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs):
-    if kwargs.get('suffix'):
+    if kwargs.get("suffix"):
         logger.error("Argument 'suffix' is not supported for compare_distributed.")
         raise CompareException(CompareException.INVALID_PARAM_ERROR)
-    stack_mode = kwargs.get('stack_mode', False)
-    auto_analyze = kwargs.get('auto_analyze', True)
-    fuzzy_match = kwargs.get('fuzzy_match', False)
-    is_print_compare_log = kwargs.get('is_print_compare_log', True)
+    is_print_compare_log = kwargs.get("is_print_compare_log", True)
     # get the ranks and match by order
     npu_ranks = sorted(check_and_return_dir_contents(npu_dump_dir, 'rank'))
     bench_ranks = sorted(check_and_return_dir_contents(bench_dump_dir, 'rank'))
     if len(npu_ranks) != len(bench_ranks):
-        logger.error('The number of ranks in the two runs are different. '
-                        'Unable to match the ranks. Please use another folder to compare '
-                        'or use compare() api and manually match the ranks.')
+        logger.error(
+            "The number of ranks in the two runs are different. "
+            "Unable to match the ranks. "
+            "Please use another folder to compare or use compare() api and manually match the ranks.")
         raise CompareException(CompareException.INVALID_PATH_ERROR)
     for nr, br in zip(npu_ranks, bench_ranks):
         npu_data_dir = os.path.join(npu_dump_dir, nr)
         bench_data_dir = os.path.join(bench_dump_dir, br)
         npu_path = extract_json(npu_data_dir, stack_json=False)
         bench_path = extract_json(bench_data_dir, stack_json=False)
-        stack_path = extract_json(npu_data_dir, stack_json=True)
         dump_result_param = {
-            'npu_json_path': npu_path,
-            'bench_json_path': bench_path,
-            'stack_json_path': stack_path,
-            'is_print_compare_log': is_print_compare_log
+            "npu_json_path": npu_path,
+            "bench_json_path": bench_path,
+            "is_print_compare_log": is_print_compare_log
         }
-        try:
-            set_dump_path(dump_result_param)
-            dump_mode = get_dump_mode(dump_result_param)
-            check_configuration_param(stack_mode, auto_analyze, fuzzy_match, is_print_compare_log)
-            create_directory(output_path)
-            check_compare_param(dump_result_param, output_path, dump_mode)
-        except (CompareException, FileCheckException) as error:
-            logger.error('Compare failed. Please check the arguments and do it again!')
-            raise CompareException(error.code) from error
-        pt_comparator = PTComparator()
-        pt_comparator.compare_core(dump_result_param, output_path, suffix=f'_{nr}-{br}', dump_mode=dump_mode, **kwargs)
+        compare(input_param=dump_result_param, output_path=output_path, suffix=f'_{nr}-{br}', **kwargs)

msprobe/pytorch/compare/pt_compare.py CHANGED Viewed

@@ -14,19 +14,29 @@
 # limitations under the License.
 import os.path
 import torch
 from msprobe.core.common.const import FileCheckConst
-from msprobe.pytorch.common.log import logger
 from msprobe.core.common.exceptions import FileCheckException
-from msprobe.core.compare.acc_compare import Comparator
-from msprobe.core.common.utils import check_configuration_param, check_compare_param, \
-    CompareException, set_dump_path, get_dump_mode
 from msprobe.core.common.file_utils import FileChecker, create_directory, load_yaml
+from msprobe.core.common.utils import CompareException, check_compare_param, check_configuration_param, get_dump_mode, \
+    set_dump_path
+from msprobe.core.compare.acc_compare import Comparator, ModeConfig
+from msprobe.core.compare.utils import set_stack_json_path
+from msprobe.pytorch.common.log import logger
 from msprobe.pytorch.common.utils import load_pt
-class PTComparator (Comparator):
-    def __init__(self, data_mapping=None):
+class PTComparator(Comparator):
+    def __init__(self, mode_config, data_mapping=None):
+        super().__init__(mode_config)
+        self.stack_mode = mode_config.stack_mode
+        self.auto_analyze = mode_config.auto_analyze
+        self.fuzzy_match = mode_config.fuzzy_match
+        self.dump_mode = mode_config.dump_mode
         self.frame_name = PTComparator.__name__
         self.data_mapping = data_mapping
         if isinstance(self.data_mapping, str) or self.data_mapping is None:
@@ -37,23 +47,24 @@ class PTComparator (Comparator):
             raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got "
                             f"{type(self.data_mapping)}")
-    def load_mapping_file(self, mapping_file):
+    @staticmethod
+    def load_mapping_file(mapping_file):
         if isinstance(mapping_file, str):
             mapping_dict = load_yaml(mapping_file)
         else:
             mapping_dict = {}
         return mapping_dict
     def read_npy_data(self, dir_path, file_name):
         if not file_name:
             return None
         data_path = os.path.join(dir_path, file_name)
         path_checker = FileChecker(data_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE,
-                                FileCheckConst.PT_SUFFIX, False)
+                                   FileCheckConst.PT_SUFFIX, False)
         data_path = path_checker.common_check()
         try:
-            data_value = load_pt(data_path,
-                                 to_cpu=True).detach()  # detach because numpy can not process gradient information
+            # detach because numpy can not process gradient information
+            data_value = load_pt(data_path, to_cpu=True).detach()
         except RuntimeError as e:
             # 这里捕获 load_pt 中抛出的异常
             logger.error(f"Failed to load the .pt file at {data_path}.")
@@ -65,20 +76,29 @@ class PTComparator (Comparator):
         if data_value.dtype == torch.bfloat16:
             data_value = data_value.to(torch.float32)
         data_value = data_value.numpy()
-        return data_value
-def compare(input_param, output_path, stack_mode=False, auto_analyze=True, fuzzy_match=False, **kwargs):
+        return data_value
+def compare(input_param, output_path, **kwargs):
     try:
+        auto_analyze = kwargs.get('auto_analyze', True)
+        fuzzy_match = kwargs.get('fuzzy_match', False)
+        data_mapping = kwargs.get('data_mapping', None)
+        suffix = kwargs.get('suffix', '')
         set_dump_path(input_param)
         dump_mode = get_dump_mode(input_param)
+        if "stack_json_path" in input_param:
+            stack_mode = kwargs.get('stack_mode', False)
+        else:
+            stack_mode = set_stack_json_path(input_param)  # set stack_mode and set "stack_json_path" in input_param
         check_configuration_param(stack_mode, auto_analyze, fuzzy_match, input_param.get('is_print_compare_log', True))
         create_directory(output_path)
-        check_compare_param(input_param, output_path, dump_mode)
-        data_mapping = kwargs.get('data_mapping', None)
+        check_compare_param(input_param, output_path, dump_mode, stack_mode)
     except (CompareException, FileCheckException) as error:
         logger.error('Compare failed. Please check the arguments and do it again!')
         raise CompareException(error.code) from error
-    pt_comparator = PTComparator(data_mapping)
-    pt_comparator.compare_core(input_param, output_path, stack_mode=stack_mode,
-                 auto_analyze=auto_analyze, fuzzy_match=fuzzy_match, dump_mode=dump_mode)
+    mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode)
+    pt_comparator = PTComparator(mode_config, data_mapping)
+    pt_comparator.compare_core(input_param, output_path, suffix=suffix)

msprobe/pytorch/debugger/debugger_config.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -26,7 +26,7 @@ class DebuggerConfig:
         self.task = task or common_config.task or Const.STATISTICS
         self.rank = common_config.rank if common_config.rank else []
         self.step = common_config.step if common_config.step else []
-        self.level = level or common_config.level or "L1"
+        self.level = level or common_config.level or Const.LEVEL_L1
         self.enable_dataloader = common_config.enable_dataloader
         self.scope = task_config.scope if task_config.scope else []
         self.list = task_config.list if task_config.list else []
@@ -34,10 +34,7 @@ class DebuggerConfig:
         self.summary_mode = task_config.summary_mode if task_config.summary_mode else Const.STATISTICS
         self.overflow_nums = task_config.overflow_nums if task_config.overflow_nums else 1
         self.framework = Const.PT_FRAMEWORK
-        if self.level == Const.LEVEL_L2:
-            self.is_backward_kernel_dump = False
-            self._check_and_adjust_config_with_l2()
+        self.async_dump = common_config.async_dump if common_config.async_dump else False
         if self.task == Const.FREE_BENCHMARK:
             self.fuzz_device = task_config.fuzz_device
@@ -64,6 +61,10 @@ class DebuggerConfig:
         self.check()
+        if self.level == Const.LEVEL_L2:
+            self.is_backward_kernel_dump = False
+            self._check_and_adjust_config_with_l2()
     def check_kwargs(self):
         if self.task and self.task not in Const.TASK_LIST:
             raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
@@ -74,29 +75,53 @@ class DebuggerConfig:
         if not self.dump_path:
             raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
                                    f"The dump_path not found.")
+        if not isinstance(self.async_dump, bool):
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"The parameters async_dump should be bool.")
+        if self.async_dump and self.task == Const.TENSOR and not self.list:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   f"The parameters async_dump is true in tensor task, the parameters list cannot be "
+                                   f"empty.")
+        if self.task == Const.STRUCTURE and self.level not in [Const.LEVEL_L0, Const.LEVEL_MIX]:
+            logger.warning_on_rank_0(
+                f"When the task is set to structure, the level should be one of {[Const.LEVEL_L0, Const.LEVEL_MIX]}. "
+                f"If not, the default level is {Const.LEVEL_MIX}."
+            )
+            self.level = Const.LEVEL_MIX
     def check(self):
         self.check_kwargs()
         return True
     def check_model(self, instance, start_model):
-        if self.level not in ["L0", "mix"]:
+        if self.level not in [Const.LEVEL_L0, Const.LEVEL_MIX]:
             if instance.model is not None or start_model is not None:
-                logger.warning_on_rank_0(
+                logger.info_on_rank_0(
                     f"The current level is not L0 or mix level, so the model parameters will not be used.")
             return
-        if start_model is None:
-            if instance.model is None:
-                logger.error_on_rank_0(
-                    f"For level {self.level}, PrecisionDebugger or start interface must receive a 'model' argument.")
-                raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR, f"missing the parameter 'model'")
+        if start_model is None and instance.model is None:
+            logger.error_on_rank_0(
+                f"For level {self.level}, PrecisionDebugger or start interface must receive a 'model' parameter.")
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR, f"missing the parameter 'model'")
+        instance.model = start_model if start_model is not None else instance.model
+        if isinstance(instance.model, torch.nn.Module):
             return
-        if isinstance(start_model, torch.nn.Module):
-            instance.model = start_model
+        error_model = None
+        if isinstance(instance.model, (list, tuple)):
+            for model in instance.model:
+                if not isinstance(model, torch.nn.Module):
+                    error_model = model
+                    break
         else:
-            logger.error_on_rank_0(f"The 'model' parameter of start must be a torch.nn.Module type.")
+            error_model = instance.model
+        if error_model is not None:
+            error_info = (f"The 'model' parameter must be a torch.nn.Module or list[torch.nn.Module] "
+                          f"type, currently there is a {type(error_model)} type.")
             raise MsprobeException(
-                MsprobeException.INVALID_PARAM_ERROR, f"model must be a torch.nn.Module")
+                MsprobeException.INVALID_PARAM_ERROR, error_info)
     def _check_and_adjust_config_with_l2(self):
         if self.scope:

mindstudio-probe 1.1.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

mindstudio-probe 1.1.1py3-none-any.whl → 1.2.2py3-none-any.whl