PyPI - mindstudio-probe - Versions diffs - 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.2py3-none-any.whl → 8.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (261) hide show

{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/METADATA +4 -3
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/RECORD +243 -191
msprobe/README.md +57 -21
msprobe/core/__init__.py +17 -0
msprobe/core/common/const.py +224 -82
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +5 -3
msprobe/core/common/file_utils.py +274 -40
msprobe/core/common/framework_adapter.py +169 -0
msprobe/core/common/global_lock.py +86 -0
msprobe/core/common/runtime.py +25 -0
msprobe/core/common/utils.py +148 -72
msprobe/core/common_config.py +7 -0
msprobe/core/compare/acc_compare.py +640 -462
msprobe/core/compare/check.py +36 -107
msprobe/core/compare/compare_cli.py +4 -0
msprobe/core/compare/config.py +72 -0
msprobe/core/compare/highlight.py +217 -215
msprobe/core/compare/layer_mapping/layer_mapping.py +4 -1
msprobe/core/compare/merge_result/merge_result.py +12 -6
msprobe/core/compare/multiprocessing_compute.py +227 -107
msprobe/core/compare/npy_compare.py +32 -16
msprobe/core/compare/utils.py +218 -244
msprobe/{mindspore/runtime.py → core/config_check/__init__.py} +2 -4
msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
msprobe/core/config_check/checkers/base_checker.py +60 -0
msprobe/core/config_check/checkers/dataset_checker.py +138 -0
msprobe/core/config_check/checkers/env_args_checker.py +96 -0
msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
msprobe/core/config_check/checkers/pip_checker.py +90 -0
msprobe/core/config_check/checkers/random_checker.py +367 -0
msprobe/core/config_check/checkers/weights_checker.py +147 -0
msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
msprobe/core/config_check/config_check_cli.py +51 -0
msprobe/core/config_check/config_checker.py +100 -0
msprobe/{pytorch/parse.py → core/config_check/resource/dependency.yaml} +7 -4
msprobe/core/config_check/resource/env.yaml +57 -0
msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
msprobe/core/config_check/utils/utils.py +107 -0
msprobe/core/data_dump/api_registry.py +239 -0
msprobe/core/data_dump/data_collector.py +36 -9
msprobe/core/data_dump/data_processor/base.py +74 -53
msprobe/core/data_dump/data_processor/mindspore_processor.py +119 -78
msprobe/core/data_dump/data_processor/pytorch_processor.py +134 -96
msprobe/core/data_dump/json_writer.py +146 -57
msprobe/core/debugger/precision_debugger.py +143 -0
msprobe/core/grad_probe/constant.py +2 -1
msprobe/core/grad_probe/grad_compare.py +2 -2
msprobe/core/grad_probe/utils.py +1 -1
msprobe/core/hook_manager.py +242 -0
msprobe/core/monitor/anomaly_processor.py +384 -0
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/core/service.py +356 -0
msprobe/core/single_save/__init__.py +0 -0
msprobe/core/single_save/single_comparator.py +243 -0
msprobe/core/single_save/single_saver.py +157 -0
msprobe/docs/01.installation.md +6 -5
msprobe/docs/02.config_introduction.md +89 -30
msprobe/docs/03.config_examples.md +1 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +184 -50
msprobe/docs/06.data_dump_MindSpore.md +193 -28
msprobe/docs/07.accuracy_checker_PyTorch.md +13 -3
msprobe/docs/08.accuracy_checker_online_PyTorch.md +72 -10
msprobe/docs/09.accuracy_checker_MindSpore.md +19 -7
msprobe/docs/10.accuracy_compare_PyTorch.md +266 -102
msprobe/docs/11.accuracy_compare_MindSpore.md +117 -43
msprobe/docs/12.overflow_check_PyTorch.md +5 -3
msprobe/docs/13.overflow_check_MindSpore.md +6 -4
msprobe/docs/14.data_parse_PyTorch.md +4 -10
msprobe/docs/17.grad_probe.md +2 -1
msprobe/docs/18.online_dispatch.md +3 -3
msprobe/docs/19.monitor.md +211 -103
msprobe/docs/21.visualization_PyTorch.md +100 -28
msprobe/docs/22.visualization_MindSpore.md +103 -31
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/25.tool_function_introduction.md +23 -22
msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
msprobe/docs/27.dump_json_instruction.md +278 -8
msprobe/docs/28.debugger_save_instruction.md +111 -20
msprobe/docs/28.kernel_dump_MindSpore.md +1 -1
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/31.config_check.md +95 -0
msprobe/docs/32.ckpt_compare.md +69 -0
msprobe/docs/33.generate_operator_MindSpore.md +190 -0
msprobe/docs/34.RL_collect.md +92 -0
msprobe/docs/35.nan_analyze.md +72 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/save_compare_result_sample.png +0 -0
msprobe/docs/img/visualization/proxy.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +3 -3
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -55
msprobe/mindspore/api_accuracy_checker/api_runner.py +25 -11
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +580 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
msprobe/mindspore/api_accuracy_checker/data_manager.py +4 -3
msprobe/mindspore/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +451 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +11 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/cell_processor.py +204 -33
msprobe/mindspore/code_mapping/graph_parser.py +4 -21
msprobe/mindspore/common/const.py +73 -2
msprobe/mindspore/common/utils.py +157 -29
msprobe/mindspore/compare/common_dir_compare.py +382 -0
msprobe/mindspore/compare/distributed_compare.py +2 -26
msprobe/mindspore/compare/ms_compare.py +18 -398
msprobe/mindspore/compare/ms_graph_compare.py +20 -10
msprobe/mindspore/compare/utils.py +37 -0
msprobe/mindspore/debugger/debugger_config.py +59 -7
msprobe/mindspore/debugger/precision_debugger.py +83 -90
msprobe/mindspore/dump/cell_dump_process.py +902 -0
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +889 -0
msprobe/mindspore/dump/dump_tool_factory.py +18 -8
msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
msprobe/mindspore/dump/hook_cell/api_register.py +176 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +22 -12
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +42 -26
msprobe/mindspore/dump/jit_dump.py +35 -27
msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -16
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +22 -12
msprobe/mindspore/free_benchmark/common/utils.py +1 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +9 -2
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/mindspore_service.py +111 -0
msprobe/mindspore/monitor/common_func.py +52 -0
msprobe/mindspore/monitor/data_writers.py +237 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +1 -1
msprobe/mindspore/monitor/features.py +13 -1
msprobe/mindspore/monitor/module_hook.py +568 -444
msprobe/mindspore/monitor/optimizer_collect.py +331 -0
msprobe/mindspore/monitor/utils.py +71 -9
msprobe/mindspore/ms_config.py +16 -15
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/task_handler_factory.py +5 -2
msprobe/msprobe.py +19 -0
msprobe/nan_analyze/__init__.py +14 -0
msprobe/nan_analyze/analyzer.py +255 -0
msprobe/nan_analyze/graph.py +189 -0
msprobe/nan_analyze/utils.py +211 -0
msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +15 -13
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +206 -4
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +9 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +6 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +31 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -20
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +154 -0
msprobe/pytorch/attl_manager.py +65 -0
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +6 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
msprobe/pytorch/common/utils.py +53 -19
msprobe/pytorch/compare/distributed_compare.py +4 -36
msprobe/pytorch/compare/pt_compare.py +13 -84
msprobe/pytorch/compare/utils.py +47 -0
msprobe/pytorch/debugger/debugger_config.py +34 -17
msprobe/pytorch/debugger/precision_debugger.py +50 -96
msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
msprobe/pytorch/dump/module_dump/module_dump.py +15 -61
msprobe/pytorch/dump/module_dump/module_processer.py +150 -114
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +1 -1
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
msprobe/pytorch/hook_module/api_register.py +155 -0
msprobe/pytorch/hook_module/hook_module.py +18 -22
msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +193 -75
msprobe/pytorch/hook_module/utils.py +28 -2
msprobe/pytorch/monitor/csv2tb.py +14 -4
msprobe/pytorch/monitor/data_writers.py +259 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +336 -241
msprobe/pytorch/monitor/module_metric.py +17 -0
msprobe/pytorch/monitor/optimizer_collect.py +244 -224
msprobe/pytorch/monitor/utils.py +84 -4
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +13 -2
msprobe/pytorch/online_dispatch/dump_compare.py +8 -2
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +5 -4
msprobe/pytorch/pt_config.py +16 -11
msprobe/pytorch/pytorch_service.py +70 -0
msprobe/visualization/builder/graph_builder.py +69 -10
msprobe/visualization/builder/msprobe_adapter.py +24 -12
msprobe/visualization/compare/graph_comparator.py +63 -51
msprobe/visualization/compare/mode_adapter.py +22 -20
msprobe/visualization/graph/base_node.py +11 -4
msprobe/visualization/graph/distributed_analyzer.py +1 -10
msprobe/visualization/graph/graph.py +2 -13
msprobe/visualization/graph/node_op.py +1 -2
msprobe/visualization/graph_service.py +251 -104
msprobe/visualization/utils.py +26 -44
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -207
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -140
msprobe/mindspore/monitor/anomaly_detect.py +0 -404
msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
msprobe/mindspore/service.py +0 -543
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -79
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
msprobe/pytorch/monitor/anomaly_detect.py +0 -410
msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
msprobe/pytorch/service.py +0 -470
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
/msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
/msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0

msprobe/core/data_dump/data_processor/pytorch_processor.py CHANGED Viewed

@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import hashlib
 import zlib
 from dataclasses import asdict
 from typing import List
@@ -24,14 +23,15 @@ from torch import distributed as dist
 from torch.distributed.distributed_c10d import _get_default_group
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import MsprobeException
 from msprobe.core.common.file_utils import path_len_exceeds_limit
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import convert_tuple
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \
     ModuleForwardInputsOutputs, TensorStatInfo
-from msprobe.pytorch.common.utils import save_pt, load_pt
+from msprobe.pytorch.common.utils import Const as PtConst, save_pt, is_hifloat8_tensor, is_float8_tensor
 from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow
-from msprobe.core.common.utils import recursion_depth_decorator
 is_gpu = False
 try:
@@ -78,14 +78,16 @@ class PytorchDataProcessor(BaseDataProcessor):
     def analyze_device_in_kwargs(element):
         single_arg = {}
         single_arg.update({'type': "torch.device"})
-        if not isinstance(element, str):
+        if isinstance(element, (int, str)):
+            single_arg.update({"value": element})
+        elif isinstance(element, torch.device):
             if hasattr(element, "index"):
                 device_value = element.type + ":" + str(element.index)
             else:
                 device_value = element.type
             single_arg.update({"value": device_value})
         else:
-            single_arg.update({"value": element})
+            logger.debug(f"Device type {type(element)} is not supported.")
         return single_arg
     @staticmethod
@@ -99,19 +101,17 @@ class PytorchDataProcessor(BaseDataProcessor):
             logger.warning("Async dump do not support complex data!")
             return tensor_stat
         elif data.dtype == torch.bool:
-            tensor_stat.stack_tensor_stat = (["Max", "Min"], torch.stack(
-                [torch.any(data), torch.all(data)]))
+            tensor_stat.max = torch.any(data)
+            tensor_stat.min = torch.all(data)
         elif not data.shape:
-            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], torch.stack([data, data, data, data]))
+            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data
         else:
-            if not data.is_floating_point() or data.dtype == torch.float64:
+            if data.dtype == torch.float64 or not data.is_floating_point():
                 data = data.float()
-            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], torch.stack([
-                torch.max(data),
-                torch.min(data),
-                torch.mean(data),
-                torch.norm(data)
-            ]))
+            tensor_stat.max = torch.max(data)
+            tensor_stat.min = torch.min(data)
+            tensor_stat.mean = torch.mean(data)
+            tensor_stat.norm = torch.norm(data)
         return tensor_stat
     @staticmethod
@@ -124,17 +124,17 @@ class PytorchDataProcessor(BaseDataProcessor):
             tensor_stat.min = np.min(data_abs).item()
             tensor_stat.mean = np.mean(data_abs).item()
         elif data.dtype == torch.bool:
-            tensor_stat.max = torch.any(data).item()
-            tensor_stat.min = torch.all(data).item()
+            tensor_stat.max = torch.any(data)
+            tensor_stat.min = torch.all(data)
         elif not data.shape:
-            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item()
+            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data
         else:
-            if not data.is_floating_point() or data.dtype == torch.float64:
+            if data.dtype == torch.float64 or not data.is_floating_point():
                 data = data.float()
-            tensor_stat.max = torch.max(data).item()
-            tensor_stat.min = torch.min(data).item()
-            tensor_stat.mean = torch.mean(data).item()
-            tensor_stat.norm = torch.norm(data).item()
+            tensor_stat.max = torch.max(data)
+            tensor_stat.min = torch.min(data)
+            tensor_stat.mean = torch.mean(data)
+            tensor_stat.norm = torch.norm(data)
         return tensor_stat
     @staticmethod
@@ -143,7 +143,7 @@ class PytorchDataProcessor(BaseDataProcessor):
         if data.is_meta:
             return tensor_stat
         data_clone = data.detach()
-        if data_clone.numel() == 0:
+        if not data_clone.numel() or not data_clone.data_ptr():
             return tensor_stat
         else:
             if data_clone.device.type == Const.CPU_LOWERCASE or not async_dump:
@@ -171,12 +171,8 @@ class PytorchDataProcessor(BaseDataProcessor):
     @staticmethod
     def process_group_hash(arg):
         group_ranks = dist.get_process_group_ranks(arg)
-        group_ranks_hash = hashlib.md5(str(group_ranks).encode('utf-8')).hexdigest()
-        return group_ranks_hash
-    @staticmethod
-    def is_distributed_op(module):
-        return getattr(module, "op_is_distributed", False)
+        group_ranks_hash = zlib.crc32(str(group_ranks).encode('utf-8'))
+        return f"{group_ranks_hash:08x}"
     @staticmethod
     def is_hookable_element(element):
@@ -214,43 +210,52 @@ class PytorchDataProcessor(BaseDataProcessor):
             logger.warning(f"Failed to get value of torch.distributed.ReduceOp with error info: {e}.")
         return {"type": "torch.distributed.ReduceOp", "value": op_type}
+    @staticmethod
+    def _cast_to_float_if_fp8(tensor):
+        dtype = str(tensor.dtype)
+        if is_float8_tensor(tensor):
+            dtype = PtConst.HIFLOAT8_TYPE if is_hifloat8_tensor(tensor) else dtype
+            logger.debug(
+                f"The {dtype} tensor analyzing/saving is unsupported in dump function."
+                f"Casting to float for processing."
+            )
+            tensor = tensor.float()
+        return tensor, dtype
     @classmethod
     def get_special_types(cls):
         return super().get_special_types() + cls.pytorch_special_type
+    def dump_async_data(self):
+        for file_path, tensor in self._async_dump_cache.items():
+            save_pt(tensor.contiguous(), file_path)
+        self._async_dump_cache.clear()
     def analyze_single_element(self, element, suffix_stack):
         if suffix_stack and suffix_stack[-1] in self.torch_object_key:
             return self.torch_object_key[suffix_stack[-1]](element)
-        if isinstance(element, torch.Size):
-            return self._analyze_torch_size(element)
-        if isinstance(element, torch.memory_format):
-            return self._analyze_memory_format(element)
-        if isinstance(element, dist.ProcessGroup):
-            return self._analyze_process_group(element)
-        if isinstance(element, dist.P2POp):
-            return self._analyze_p2pop(element)
-        if isinstance(element, dist.ReduceOp):
-            return self._analyze_reduce_op(element)
-        converted_numpy, numpy_type = self._convert_numpy_to_builtin(element)
-        if converted_numpy is not element:
-            return {"type": numpy_type, "value": converted_numpy}
-        if isinstance(element, torch.Tensor):
-            return self._analyze_tensor(element, Const.SEP.join([str(suffix) for suffix in suffix_stack]))
-        if isinstance(element, np.ndarray):
-            return self._analyze_numpy(element, Const.SEP.join([str(suffix) for suffix in suffix_stack]))
-        if isinstance(element, (bool, int, float, str, slice, type(Ellipsis))):
-            return self._analyze_builtin(element)
-        return {}
-    def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
-        if self.is_distributed_op(module):
-            module_input_output.update_output_with_args_and_kwargs()
-        return super().analyze_forward_output(name, module, module_input_output)
+        suffix_str = Const.SEP.join(str(s) for s in suffix_stack)
+        type_analyzer = [
+            (PytorchDataProcessor.builtin_type, self._analyze_builtin),
+            (torch.Size, self._analyze_torch_size),
+            (torch.Tensor, lambda e: self._analyze_tensor(e, suffix_str)),
+            (torch.memory_format, self._analyze_memory_format),
+            (dist.ProcessGroup, self._analyze_process_group),
+            (dist.P2POp, lambda e: self._analyze_p2pop(e, suffix_str)),
+            (dist.ReduceOp, self._analyze_reduce_op),
+            (PytorchDataProcessor.np_type[:-1], self._analyze_numpy),
+            (np.ndarray, lambda e: self._analyze_ndarray(e, suffix_str)),
+        ]
+        for type_key, analyze_fn in type_analyzer:
+            if isinstance(element, type_key):
+                return analyze_fn(element)
+        return {}
-    def _analyze_p2pop(self, arg):
+    def _analyze_p2pop(self, arg, suffix):
         p2pop_info = {"class_type": "torch.distributed.P2POp"}
         try:
-            tensor_info = self._analyze_tensor(arg.tensor, [])
+            tensor_info = self._analyze_tensor(arg.tensor, suffix)
             p2pop_info.update({"tensor": tensor_info})
             p2pop_info.update({"op": arg.op.__name__})
             p2pop_info.update({"peer": arg.peer})
@@ -263,63 +268,71 @@ class PytorchDataProcessor(BaseDataProcessor):
         return p2pop_info
     def _analyze_tensor(self, tensor, suffix):
+        tensor, dtype = self._cast_to_float_if_fp8(tensor)
         tensor_stat = self.get_stat_info(tensor, self.config.async_dump)
         tensor_json = {}
         tensor_json.update({'type': 'torch.Tensor'})
-        tensor_json.update({'dtype': str(tensor.dtype)})
+        tensor_json.update({'dtype': dtype})
         tensor_json.update({"shape": tensor.shape})
-        if tensor_stat.stack_tensor_stat is None:
-            tensor_json.update({"Max": tensor_stat.max})
-            tensor_json.update({"Min": tensor_stat.min})
-            tensor_json.update({"Mean": tensor_stat.mean})
-            tensor_json.update({"Norm": tensor_stat.norm})
-            tensor_json.update({"requires_grad": tensor.requires_grad})
-            if tensor_stat.max is not None:
-                if np.isinf(tensor_stat.max) or np.isnan(tensor_stat.max):
-                    tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "max")
-            if tensor_stat.min is not None:
-                if np.isinf(tensor_stat.min) or np.isnan(tensor_stat.min):
-                    tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "min")
-        else:
-            tensor_json.update({"requires_grad": tensor.requires_grad})
-            tensor_json.update({"tensor_stat": tensor_stat.stack_tensor_stat})
+        stat_values = [
+            tensor_stat.max,
+            tensor_stat.min,
+            tensor_stat.mean,
+            tensor_stat.norm
+        ]
+        placeholder_index = self.data_writer.append_stat_to_buffer(stat_values)
+        tensor_json.update({Const.TENSOR_STAT_INDEX: placeholder_index})
+        tensor_json.update({"requires_grad": tensor.requires_grad})
         if self.config.summary_mode == Const.MD5 and not self.config.async_dump:
             tensor_md5 = self.get_md5_for_tensor(tensor)
             tensor_json.update({Const.MD5: tensor_md5})
         return tensor_json
-class StatisticsDataProcessor(PytorchDataProcessor):
-    pass
-class TensorDataProcessor(PytorchDataProcessor):
-    def dump_async_data(self):
-        for file_path, tensor in self._async_dump_cache.items():
-            save_pt(tensor.contiguous(), file_path)
-        self._async_dump_cache.clear()
-    def _analyze_tensor(self, tensor, suffix):
+    def _analyze_and_save_tensor(self, tensor, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
-        single_arg = super()._analyze_tensor(tensor, suffix)
+        single_arg = PytorchDataProcessor._analyze_tensor(self, tensor, suffix)
         single_arg.update({"data_name": dump_data_name})
+        tensor, _ = self._cast_to_float_if_fp8(tensor)
         if self.config.async_dump:
             self._async_dump_cache[file_path] = tensor.clone().detach()
         else:
             saved_tensor = tensor.clone().contiguous().detach()
             save_pt(saved_tensor, file_path)
         return single_arg
-    def _analyze_numpy(self, ndarray, suffix):
+    def _analyze_and_save_ndarray(self, ndarray, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
         save_pt(torch.tensor(ndarray), file_path)
-        ndarray_json = super()._analyze_numpy(ndarray, suffix)
+        ndarray_json = PytorchDataProcessor._analyze_ndarray(ndarray, suffix)
         ndarray_json.update({"data_name": dump_data_name})
         return ndarray_json
+class StatisticsDataProcessor(PytorchDataProcessor):
+    def _analyze_tensor(self, tensor, suffix):
+        if any(item in self.current_api_or_module_name for item in self.config.tensor_list):
+            return self._analyze_and_save_tensor(tensor, suffix)
+        else:
+            return super()._analyze_tensor(tensor, suffix)
+    def _analyze_ndarray(self, ndarray, suffix):
+        if any(item in self.current_api_or_module_name for item in self.config.tensor_list):
+            return self._analyze_and_save_ndarray(ndarray, suffix)
+        else:
+            return super()._analyze_ndarray(ndarray, suffix)
+class TensorDataProcessor(PytorchDataProcessor):
+    def _analyze_tensor(self, tensor, suffix):
+        return self._analyze_and_save_tensor(tensor, suffix)
+    def _analyze_ndarray(self, ndarray, suffix):
+        return self._analyze_and_save_ndarray(ndarray, suffix)
 class OverflowCheckDataProcessor(PytorchDataProcessor):
     __slots__ = ["cached_tensors_and_file_paths"]
@@ -383,7 +396,8 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
             self._analyze_maybe_overflow_flag()
         if self.has_overflow:
             for file_path, tensor in self.cached_tensors_and_file_paths.items():
-                save_pt(tensor, file_path)
+                tensor, _ = self._cast_to_float_if_fp8(tensor)
+                save_pt(tensor.clone().contiguous().detach(), file_path)
             self.real_overflow_nums += 1
             if self.overflow_nums != -1 and self.real_overflow_nums >= self.overflow_nums:
                 logger.info(f"[{Const.TOOL_NAME}] Reached the preset overflow times, "
@@ -409,10 +423,22 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
             raise RuntimeError(f"overflow check failed") from e
     def _analyze_maybe_overflow_tensor(self, tensor_json):
-        if tensor_json['Max'] is None or tensor_json['Min'] is None:
+        tensor_stat_index = tensor_json.get(Const.TENSOR_STAT_INDEX)
+        if tensor_stat_index is None:
+            logger.warning("tensor_stat_index does not exist in tensor_json.")
+            return
+        max_tensor = self.data_writer.get_buffer_values_max(tensor_stat_index)
+        min_tensor = self.data_writer.get_buffer_values_min(tensor_stat_index)
+        if max_tensor is None or min_tensor is None:
+            return
+        if torch.isinf(max_tensor) or torch.isnan(max_tensor):
+            self.has_overflow = True
             return
-        self.has_overflow = np.isinf(tensor_json['Max']) or np.isnan(tensor_json['Max']) or \
-                            np.isinf(tensor_json['Min']) or np.isnan(tensor_json['Min'])
+        if torch.isinf(min_tensor) or torch.isnan(min_tensor):
+            self.has_overflow = True
     def _analyze_tensor(self, tensor, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
@@ -508,11 +534,13 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
             return
         if self.config.is_backward_kernel_dump:
-            self.forward_args = self.clone_and_detach_tensor(module_input_output.args)
-            self.forward_kwargs = self.clone_and_detach_tensor(module_input_output.kwargs)
             try:
+                self.forward_args = self.clone_and_detach_tensor(module_input_output.args)
+                self.forward_kwargs = self.clone_and_detach_tensor(module_input_output.kwargs)
                 output = module.forward(*self.forward_args, **self.forward_kwargs)
-            except Exception:
+            except Exception as e:
+                if isinstance(e, MsprobeException):
+                    logger.warning(str(e))
                 self._print_unsupported_log(name)
                 self.enable_kernel_dump = False
                 return
@@ -554,9 +582,17 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
         self.stop_kernel_dump()
         logger.info(f"The kernel data of {name} is dumped successfully.")
-    @recursion_depth_decorator("KernelDump: KernelDumpDataProcessor.clone_and_detach_tensor")
+    @recursion_depth_decorator(
+        "KernelDump: KernelDumpDataProcessor.clone_and_detach_tensor",
+        max_depth=Const.DUMP_MAX_DEPTH
+    )
     def clone_and_detach_tensor(self, input_params):
         if isinstance(input_params, torch.Tensor):
+            if is_float8_tensor(input_params):
+                raise MsprobeException(
+                    MsprobeException.UNSUPPORTED_TYPE_ERROR,
+                    f"L2 backward dump does not support float8 type."
+                )
             if input_params.requires_grad:
                 return input_params.clone().detach().requires_grad_()
             return input_params.clone()
@@ -571,6 +607,8 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
     def analyze_single_element(self, element, suffix_stack):
         if isinstance(element, torch.Tensor):
+            if is_float8_tensor(element):
+                return {}
             if not self.is_found_output_tensor:
                 if element.requires_grad:
                     self.forward_output_tensor = element

msprobe/core/data_dump/json_writer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -16,12 +16,14 @@
 import csv
 import os
 import copy
-import numpy as np
+import threading
 from msprobe.core.common.const import Const, FileCheckConst
 from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json
 from msprobe.core.common.log import logger
-from msprobe.core.common.exceptions import MsprobeException
+from msprobe.core.common.decorator import recursion_depth_decorator
+lock = threading.Lock()
 class DataWriter:
@@ -34,10 +36,12 @@ class DataWriter:
         self.dump_tensor_data_dir = None
         self.debug_file_path = None
         self.flush_size = 1000
+        self.larger_flush_size = 20000
         self.cache_data = {}
         self.cache_stack = {}
         self.cache_construct = {}
         self.cache_debug = {}
+        self.stat_stack_list = []
     @staticmethod
     def write_data_to_csv(result: list, result_header: tuple, file_path: str):
@@ -54,13 +58,54 @@ class DataWriter:
         if is_new_file:
             change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY)
+    @recursion_depth_decorator("JsonWriter: DataWriter._replace_stat_placeholders")
+    def _replace_stat_placeholders(self, data, stat_result):
+        if isinstance(data, dict):
+            keys = list(data.keys())  # 获取当前所有键
+            for key in keys:  # 递归所有变量
+                value = data[key]
+                if key == Const.TENSOR_STAT_INDEX and isinstance(value, int):
+                    if value >= 0:
+                        idx = value
+                    else:
+                        return
+                    stat_values = stat_result[idx] if idx < len(stat_result) else [None] * 4
+                    new_entries = {
+                        Const.TYPE: data["type"],
+                        Const.DTYPE: data["dtype"],
+                        Const.SHAPE: data["shape"],
+                        Const.MAX: stat_values[0],
+                        Const.MIN: stat_values[1],
+                        Const.MEAN: stat_values[2],
+                        Const.NORM: stat_values[3],
+                    }
+                    del data[key]
+                    # 重构字典顺序
+                    updated_dict = {}
+                    # 通过插入排序后字段保证字段写入json的有序
+                    updated_dict.update(new_entries)
+                    # 遍历原字典其他字段（排除已删除的tensor_stat_index）
+                    for k in data:
+                        if k not in new_entries:
+                            updated_dict[k] = data[k]
+                    data.clear()
+                    data.update(updated_dict)
+                else:
+                    self._replace_stat_placeholders(value, stat_result)
+        elif isinstance(data, (list, tuple)):
+            for item in data:
+                self._replace_stat_placeholders(item, stat_result)
     def reset_cache(self):
         self.cache_data = {}
         self.cache_stack = {}
         self.cache_construct = {}
+        self.cache_debug = {}
     def initialize_json_file(self, **kwargs):
-        if self.debug_file_path and not self.cache_debug:
+        if kwargs["level"] == Const.LEVEL_DEBUG and not self.cache_debug:
             # debug level case only create debug.json
             debug_dict = copy.deepcopy(kwargs)
             debug_dict.update({"dump_data_dir": self.dump_tensor_data_dir, Const.DATA: {}})
@@ -86,39 +131,59 @@ class DataWriter:
     def flush_data_periodically(self):
         dump_data = self.cache_data.get(Const.DATA)
-        if dump_data and isinstance(dump_data, dict) and len(dump_data) % self.flush_size == 0:
-            self.write_json()
-    def update_data(self, new_data):
-        if not isinstance(new_data, dict) or len(new_data.keys()) != 1:
-            logger.warning(f"The data info({new_data}) should be a dict with only one outer key.")
-            return
-        dump_data = self.cache_data.get(Const.DATA)
-        if not isinstance(dump_data, dict):
-            logger.warning(f"The dump data({dump_data}) should be a dict.")
+        if not dump_data or not isinstance(dump_data, dict):
             return
-        key = next(iter(new_data.keys()))
-        if key in dump_data:
-            dump_data.get(key).update(new_data.get(key))
-        else:
-            dump_data.update(new_data)
+        length = len(dump_data)
-    def update_stack(self, new_data):
-        self.cache_stack.update(new_data)
+        threshold = self.flush_size if length < self.larger_flush_size else self.larger_flush_size
+        if length % threshold == 0:
+            self.write_json()
+    def update_data(self, new_data):
+        with lock:
+            if not isinstance(new_data, dict) or len(new_data.keys()) != 1:
+                logger.warning(f"The data info({new_data}) should be a dict with only one outer key.")
+                return
+            dump_data = self.cache_data.get(Const.DATA)
+            if not isinstance(dump_data, dict):
+                logger.warning(f"The dump data({dump_data}) should be a dict.")
+                return
+            key = next(iter(new_data.keys()))
+            if key in dump_data:
+                dump_data.get(key).update(new_data.get(key))
+            else:
+                dump_data.update(new_data)
+    def update_stack(self, name, stack_data):
+        with lock:
+            api_list = self.cache_stack.get(stack_data)
+            if api_list is None:
+                self.cache_stack.update({stack_data: [name]})
+            else:
+                api_list.append(name)
     def update_construct(self, new_data):
-        self.cache_construct.update(new_data)
+        with lock:
+            self.cache_construct.update(new_data)
     def update_debug(self, new_data):
-        self.cache_debug['data'].update(new_data)
+        with lock:
+            self.cache_debug['data'].update(new_data)
     def write_data_json(self, file_path):
         logger.info(f"dump.json is at {os.path.dirname(os.path.dirname(file_path))}. ")
         save_json(file_path, self.cache_data, indent=1)
     def write_stack_info_json(self, file_path):
-        save_json(file_path, self.cache_stack, indent=1)
+        num, new_cache_stack = 0, {}
+        for key, value in self.cache_stack.items():
+            new_cache_stack[num] = [value, key]
+            num += 1
+        save_json(file_path, new_cache_stack, indent=1)
     def write_construct_info_json(self, file_path):
         save_json(file_path, self.cache_construct, indent=1)
@@ -126,38 +191,62 @@ class DataWriter:
     def write_debug_info_json(self, file_path):
         save_json(file_path, self.cache_debug, indent=1)
+    def append_stat_to_buffer(self, stat_vector):
+        """
+        直接使用 Python list 存储 stat_vector,
+        将 stat_vector 存入 self.stat_stack_list 的方式
+        """
+        self.stat_stack_list.append(stat_vector)
+        return len(self.stat_stack_list) - 1
+    def get_buffer_values_max(self, index):
+        if 0 <= index < len(self.stat_stack_list) and len(self.stat_stack_list[index]) >= 1:
+            return self.stat_stack_list[index][0]
+        else:
+            logger.warning(f"stat_stack_list[{index}] The internal data is incomplete,"
+                           f" and the maximum value cannot be obtained.")
+            return None
+    def get_buffer_values_min(self, index):
+        if 0 <= index < len(self.stat_stack_list) and len(self.stat_stack_list[index]) >= 1:
+            return self.stat_stack_list[index][1]
+        else:
+            logger.warning(f"stat_stack_list[{index}] Internal data is incomplete"
+                           f" and minimum values cannot be obtained.")
+            return None
+    def flush_stat_stack(self):
+        """
+        在 flush 阶段，将所有存储的统计值从设备搬到 CPU，
+        这里返回一个列表，每个元素是 [Max, Min, Mean, Norm] 的数值列表
+        """
+        if not self.stat_stack_list:
+            return []
+        result = [
+            [
+                x.item() if hasattr(x, "item") else x
+                for x in stat_values
+            ]
+            for stat_values in self.stat_stack_list
+        ]
+        self.stat_stack_list = []
+        return result
     def write_json(self):
-        if self.cache_data:
-            self.write_data_json(self.dump_file_path)
-        if self.cache_stack:
-            self.write_stack_info_json(self.stack_file_path)
-        if self.cache_construct:
-            self.write_construct_info_json(self.construct_file_path)
-        if self.cache_debug:
-            self.write_debug_info_json(self.debug_file_path)
-    def fill_stack_tensor_data(self):
-        self.process_stat_data_recursive(self.cache_data)
-    def process_stat_data_recursive(self, data, depth=0):
-        if depth > Const.MAX_DEPTH:
-            logger.error(f"The maximum depth of recursive process stat data, {Const.MAX_DEPTH} is reached.")
-            raise MsprobeException(MsprobeException.RECURSION_LIMIT_ERROR)
-        if isinstance(data, dict):
-            if "tensor_stat" in data.keys():
-                tensor_stat = data["tensor_stat"]
-                if len(tensor_stat) != Const.TENSOR_STAT_LEN or len(tensor_stat[0]) != len(tensor_stat[1]):
-                    logger.warning("Some bad data in async dump")
-                else:
-                    tensor_stat_index, tensor_stat_data = tensor_stat[0], tensor_stat[1]
-                    if hasattr(tensor_stat_data, "device") and tensor_stat_data.device != Const.CPU_LOWERCASE:
-                        tensor_stat_data = tensor_stat_data.cpu()
-                    for index, stat in zip(tensor_stat_index, tensor_stat_data):
-                        data.update({index: stat.item()})
-                del data["tensor_stat"]
-            else:
-                for key in data.keys():
-                    self.process_stat_data_recursive(data[key], depth + 1)
-        elif isinstance(data, (list, tuple)):
-            for i in data:
-                self.process_stat_data_recursive(i, depth + 1)
+        with lock:
+            # 在写 JSON 前，统一获取统计值
+            stat_result = self.flush_stat_stack()
+            # 遍历 cache_data，将占位符替换为最终统计值
+            if stat_result:
+                self._replace_stat_placeholders(self.cache_data, stat_result)
+                if self.cache_debug:
+                    self._replace_stat_placeholders(self.cache_debug, stat_result)
+            if self.cache_data:
+                self.write_data_json(self.dump_file_path)
+            if self.cache_stack:
+                self.write_stack_info_json(self.stack_file_path)
+            if self.cache_construct:
+                self.write_construct_info_json(self.construct_file_path)
+            if self.cache_debug:
+                self.write_debug_info_json(self.debug_file_path)

mindstudio-probe 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl

mindstudio-probe 1.2.2py3-none-any.whl → 8.1.0py3-none-any.whl