PyPI - mindstudio-probe - Versions diffs - 8.1.2__py3-none-any.whl → 8.2.1__py3-none-any.whl - Mend

mindstudio-probe 8.1.2py3-none-any.whl → 8.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/METADATA +2 -2
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/RECORD +172 -147
msprobe/README.md +6 -6
msprobe/core/common/const.py +98 -41
msprobe/core/common/db_manager.py +256 -0
msprobe/core/common/file_utils.py +28 -5
msprobe/core/common/log.py +7 -0
msprobe/core/common/megatron_utils.py +59 -0
msprobe/core/common/parallel_state.py +193 -0
msprobe/core/common/utils.py +20 -13
msprobe/core/common_config.py +5 -0
msprobe/core/compare/acc_compare.py +140 -93
msprobe/core/compare/check.py +13 -0
msprobe/core/compare/compare_cli.py +64 -6
msprobe/core/compare/config.py +10 -8
msprobe/core/compare/diff_analyze/diff_analyze_threshold.yaml +14 -0
msprobe/core/compare/diff_analyze/first_diff_analyze.py +135 -0
msprobe/core/compare/diff_analyze/ignore_op_list.yaml +3 -0
msprobe/core/compare/find_first/__init__.py +0 -0
msprobe/core/compare/find_first/analyzer.py +282 -0
msprobe/core/compare/find_first/data_processor.py +35 -0
msprobe/core/compare/find_first/graph.py +188 -0
msprobe/core/compare/find_first/utils.py +189 -0
msprobe/core/compare/highlight.py +74 -101
msprobe/core/compare/layer_mapping/layer_mapping.py +14 -9
msprobe/core/compare/merge_result/merge_result.py +2 -2
msprobe/core/compare/multiprocessing_compute.py +45 -28
msprobe/core/compare/npy_compare.py +7 -10
msprobe/core/compare/utils.py +338 -130
msprobe/core/config_check/checkers/dataset_checker.py +2 -1
msprobe/core/config_check/checkers/env_args_checker.py +5 -5
msprobe/core/config_check/checkers/hyperparameter_checker.py +30 -10
msprobe/core/config_check/checkers/pip_checker.py +4 -3
msprobe/core/config_check/checkers/random_checker.py +3 -3
msprobe/core/config_check/checkers/weights_checker.py +2 -1
msprobe/core/config_check/ckpt_compare/megatron_loader.py +2 -0
msprobe/core/config_check/resource/hyperparameter.yaml +11 -1
msprobe/core/config_check/utils/hyperparameter_parser.py +7 -3
msprobe/core/config_check/utils/utils.py +10 -0
msprobe/core/data_dump/api_registry.py +49 -30
msprobe/core/data_dump/data_collector.py +71 -29
msprobe/core/data_dump/data_processor/base.py +2 -0
msprobe/core/data_dump/data_processor/mindspore_processor.py +47 -53
msprobe/core/data_dump/data_processor/pytorch_processor.py +227 -93
msprobe/core/data_dump/json_writer.py +81 -7
msprobe/core/data_dump/scope.py +4 -6
msprobe/core/hook_manager.py +129 -70
msprobe/core/monitor/csv2db.py +361 -0
msprobe/core/monitor/db_utils.py +278 -0
msprobe/core/monitor/utils.py +35 -1
msprobe/core/service.py +31 -39
msprobe/core/single_save/single_comparator.py +16 -3
msprobe/docs/01.installation.md +51 -19
msprobe/docs/02.config_introduction.md +16 -20
msprobe/docs/03.config_examples.md +26 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +6 -2
msprobe/docs/06.data_dump_MindSpore.md +44 -7
msprobe/docs/07.accuracy_checker_PyTorch.md +1 -1
msprobe/docs/10.accuracy_compare_PyTorch.md +124 -44
msprobe/docs/11.accuracy_compare_MindSpore.md +75 -7
msprobe/docs/14.data_parse_PyTorch.md +1 -1
msprobe/docs/19.monitor.md +94 -7
msprobe/docs/21.visualization_PyTorch.md +71 -101
msprobe/docs/22.visualization_MindSpore.md +69 -119
msprobe/docs/23.generate_operator_PyTorch.md +1 -1
msprobe/docs/25.tool_function_introduction.md +0 -1
msprobe/docs/26.data_dump_PyTorch_baseline.md +7 -7
msprobe/docs/28.debugger_save_instruction.md +184 -81
msprobe/docs/29.data_dump_MSAdapter.md +6 -0
msprobe/docs/31.config_check.md +4 -2
msprobe/docs/36.calculation_result_change.md +75 -0
msprobe/docs/FAQ.md +22 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +6 -2
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/3.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/4.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/5.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/6.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/7.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt +59 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt +80 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactory_mapping.md +330 -0
msprobe/mindspore/__init__.py +1 -1
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +1 -1
msprobe/mindspore/api_accuracy_checker/api_runner.py +9 -6
msprobe/mindspore/api_accuracy_checker/compute_element.py +18 -12
msprobe/mindspore/cell_processor.py +64 -25
msprobe/mindspore/common/utils.py +51 -7
msprobe/mindspore/compare/common_dir_compare.py +45 -37
msprobe/mindspore/compare/ms_compare.py +10 -2
msprobe/mindspore/compare/ms_graph_compare.py +47 -52
msprobe/mindspore/debugger/debugger_config.py +18 -7
msprobe/mindspore/debugger/precision_debugger.py +16 -12
msprobe/mindspore/dump/cell_dump_process.py +130 -68
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +10 -2
msprobe/mindspore/dump/graph_mode_cell_dump.py +35 -9
msprobe/mindspore/dump/graph_tensor_dump.py +11 -0
msprobe/mindspore/dump/hook_cell/api_register.py +19 -20
msprobe/mindspore/dump/hook_cell/hook_cell.py +12 -34
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +142 -21
msprobe/mindspore/dump/kernel_kbyk_dump.py +24 -0
msprobe/mindspore/exception_dump/__init__.py +0 -0
msprobe/mindspore/exception_dump/exception_dump_tool_factory.py +51 -0
msprobe/mindspore/exception_dump/kernel_graph_exception_dump.py +57 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +5 -4
msprobe/mindspore/mindspore_service.py +2 -2
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +12 -7
msprobe/mindspore/monitor/features.py +82 -0
msprobe/mindspore/monitor/module_hook.py +168 -10
msprobe/mindspore/monitor/utils.py +27 -1
msprobe/mindspore/ms_config.py +12 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +1 -1
msprobe/mindspore/task_handler_factory.py +3 -1
msprobe/nan_analyze/graph.py +1 -1
msprobe/pytorch/api_accuracy_checker/common/config.py +3 -36
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +0 -24
msprobe/pytorch/api_accuracy_checker/compare/compare.py +2 -12
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -6
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +12 -132
msprobe/pytorch/common/utils.py +1 -21
msprobe/pytorch/compare/pt_compare.py +10 -2
msprobe/pytorch/{hook_module/jit_script_wrapper.py → compare/pt_diff_analyze.py} +3 -15
msprobe/pytorch/compare/utils.py +2 -1
msprobe/pytorch/debugger/debugger_config.py +18 -23
msprobe/pytorch/dump/module_dump/hook_wrapper.py +10 -7
msprobe/pytorch/dump/module_dump/module_processer.py +41 -19
msprobe/pytorch/free_benchmark/main.py +7 -4
msprobe/pytorch/hook_module/api_register.py +62 -24
msprobe/pytorch/hook_module/hook_module.py +9 -29
msprobe/pytorch/hook_module/pt_hook_manager.py +84 -15
msprobe/pytorch/hook_module/script_wrapper.py +140 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +6 -0
msprobe/pytorch/monitor/csv2tb.py +1 -1
msprobe/pytorch/monitor/features.py +94 -0
msprobe/pytorch/monitor/module_hook.py +221 -81
msprobe/pytorch/monitor/module_metric.py +27 -1
msprobe/pytorch/monitor/optimizer_collect.py +109 -4
msprobe/pytorch/online_dispatch/dispatch.py +42 -24
msprobe/pytorch/online_dispatch/dump_compare.py +1 -1
msprobe/pytorch/parse_tool/lib/visualization.py +0 -1
msprobe/pytorch/pt_config.py +2 -51
msprobe/pytorch/pytorch_service.py +7 -14
msprobe/visualization/builder/graph_builder.py +192 -63
msprobe/visualization/builder/graph_merger.py +986 -0
msprobe/visualization/builder/msprobe_adapter.py +17 -15
msprobe/visualization/compare/graph_comparator.py +26 -16
msprobe/visualization/db_utils.py +252 -0
msprobe/visualization/graph/base_node.py +2 -22
msprobe/visualization/graph/distributed_analyzer.py +12 -12
msprobe/visualization/graph/graph.py +44 -16
msprobe/visualization/graph_service.py +143 -59
msprobe/visualization/utils.py +103 -4
msprobe/docs/08.accuracy_checker_online_PyTorch.md +0 -295
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +0 -205
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +0 -378
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +0 -239
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +0 -115
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +0 -250
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +0 -63
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +0 -198
msprobe/pytorch/attl_manager.py +0 -65
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/WHEEL +0 -0
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/top_level.txt +0 -0
/msprobe/{pytorch/api_accuracy_checker/tensor_transport_layer → core/compare/diff_analyze}/__init__.py +0 -0

msprobe/core/data_dump/data_processor/mindspore_processor.py CHANGED Viewed

@@ -13,7 +13,9 @@
 # limitations under the License.
 # ============================================================================
+import os
 import zlib
+from concurrent.futures import ThreadPoolExecutor
 import mindspore as ms
 from mindspore import mint, ops, hal
@@ -53,6 +55,11 @@ class MindsporeDataProcessor(BaseDataProcessor):
         }
         self._async_dump_cache = {}
         self.api_register = get_api_register()
+        self._crc_executor = ThreadPoolExecutor(max_workers=os.cpu_count() // 2)
+    @staticmethod
+    def compute_crc32_bytes(tensor_bytes):
+        return f"{zlib.crc32(tensor_bytes):08x}"
     @staticmethod
     def get_md5_for_tensor(x):
@@ -65,52 +72,6 @@ class MindsporeDataProcessor(BaseDataProcessor):
     def analyze_dtype_in_kwargs(element):
         return {"type": "mindspore.dtype", "value": str(element)}
-    @staticmethod
-    def get_stat_info_sync(data):
-        tensor_stat = TensorStatInfo()
-        if data.dtype == ms.bool_:
-            data_np = data.asnumpy()
-            tensor_stat.max = np.max(data_np).item()
-            tensor_stat.min = np.min(data_np).item()
-        elif not data.shape:
-            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data
-        elif data.dtype == ms.complex64 or data.dtype == ms.complex128:
-            data_abs = np.abs(data.asnumpy())
-            tensor_stat.max = np.max(data_abs).item()
-            tensor_stat.min = np.min(data_abs).item()
-            tensor_stat.mean = np.mean(data_abs).item()
-            tensor_stat.norm = np.linalg.norm(data_abs).item()
-        else:
-            if not ops.is_floating_point(data) or data.dtype == ms.float64:
-                data = data.to(ms.float32)
-            get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm
-            tensor_stat.max = mint.max(data)
-            tensor_stat.min = mint.min(data)
-            tensor_stat.mean = mint.mean(data)
-            tensor_stat.norm = get_norm_value(data)
-        return tensor_stat
-    @staticmethod
-    def get_stat_info_async(data):
-        tensor_stat = TensorStatInfo()
-        if data.dtype == ms.bool_:
-            tensor_stat.max = mint.any(data)
-            tensor_stat.min = mint.all(data)
-        elif not data.shape:
-            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data
-        elif data.dtype == ms.complex64 or data.dtype == ms.complex128:
-            logger.warning("Async dump do not support complex data!")
-            return tensor_stat
-        else:
-            if not ops.is_floating_point(data) or data.dtype == ms.float64:
-                data = data.to(ms.float32)
-            get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm
-            tensor_stat.max = mint.max(data)
-            tensor_stat.min = mint.min(data)
-            tensor_stat.mean = mint.mean(data)
-            tensor_stat.norm = get_norm_value(data)
-        return tensor_stat
     @staticmethod
     def is_hookable_element(element):
         return hasattr(element, "register_hook") and callable(element.register_hook)
@@ -147,14 +108,37 @@ class MindsporeDataProcessor(BaseDataProcessor):
         self.api_register.restore_inner_used_api()
         tensor_stat = TensorStatInfo()
         if data.numel() == 0:
-            stat_info = tensor_stat
-        else:
+            pass
+        elif data.dtype == ms.bool_:
+            if self.config.async_dump:
+                tensor_stat.max = mint.any(data)
+                tensor_stat.min = mint.all(data)
+            else:
+                data_np = data.asnumpy()
+                tensor_stat.max = np.max(data_np).item()
+                tensor_stat.min = np.min(data_np).item()
+        elif not data.shape:
+            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.copy()
+        elif data.dtype == ms.complex64 or data.dtype == ms.complex128:
             if self.config.async_dump:
-                stat_info = MindsporeDataProcessor.get_stat_info_async(data)
+                logger.warning("Async dump do not support complex data!")
             else:
-                stat_info = MindsporeDataProcessor.get_stat_info_sync(data)
+                data_abs = np.abs(data.asnumpy())
+                tensor_stat.max = np.max(data_abs).item()
+                tensor_stat.min = np.min(data_abs).item()
+                tensor_stat.mean = np.mean(data_abs).item()
+                tensor_stat.norm = np.linalg.norm(data_abs).item()
+        else:
+            if self.config.precision == Const.DUMP_PRECISION_HIGH or not ops.is_floating_point(
+                    data) or data.dtype == ms.float64:
+                data = data.to(ms.float32)
+            get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm
+            tensor_stat.max = mint.max(data)
+            tensor_stat.min = mint.min(data)
+            tensor_stat.mean = mint.mean(data)
+            tensor_stat.norm = get_norm_value(data)
         self.api_register.register_inner_used_api()
-        return stat_info
+        return tensor_stat
     def analyze_single_element(self, element, suffix_stack):
         if suffix_stack and suffix_stack[-1] in self.mindspore_object_key:
@@ -211,8 +195,18 @@ class MindsporeDataProcessor(BaseDataProcessor):
         tensor_json.update({Const.TENSOR_STAT_INDEX: placeholder_index})
         if self.config.summary_mode == Const.MD5 and not self.config.async_dump:
-            tensor_md5 = self.get_md5_for_tensor(tensor)
-            tensor_json.update({Const.MD5: tensor_md5})
+            tensor = convert_bf16_to_fp32(tensor)
+            # 拷贝并搬到 CPU
+            tensor_bytes = tensor.asnumpy()
+            future = self._crc_executor.submit(
+                MindsporeDataProcessor.compute_crc32_bytes,
+                tensor_bytes
+            )
+            crc_placeholder = self.data_writer.append_crc32_to_buffer(future)
+            tensor_json[Const.MD5_INDEX] = crc_placeholder
         return tensor_json
     def _analyze_and_save_tensor(self, tensor, suffix):

msprobe/core/data_dump/data_processor/pytorch_processor.py CHANGED Viewed

@@ -13,7 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import ctypes
+import os
 import zlib
+from collections.abc import Iterable
+from concurrent.futures import ThreadPoolExecutor
 from dataclasses import asdict
 from typing import List
@@ -23,11 +27,10 @@ from torch import distributed as dist
 from torch.distributed.distributed_c10d import _get_default_group
 from msprobe.core.common.const import Const
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.core.common.exceptions import MsprobeException
-from msprobe.core.common.file_utils import path_len_exceeds_limit
 from msprobe.core.common.log import logger
-from msprobe.core.common.utils import convert_tuple
-from msprobe.core.common.decorator import recursion_depth_decorator
+from msprobe.core.common.utils import convert_tuple, is_int
 from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \
     ModuleForwardInputsOutputs, TensorStatInfo
 from msprobe.pytorch.common.utils import save_pt
@@ -40,6 +43,84 @@ except ImportError:
     is_gpu = True
+class TensorHandler:
+    def __init__(self):
+        self.has_dtensor = hasattr(dist, "tensor") and hasattr(dist.tensor, "DTensor")
+        self.has_fake_tensor = hasattr(torch, "_subclasses") and hasattr(torch._subclasses, "fake_tensor")
+        self.has_async_collective_tensor = hasattr(dist, "_functional_collectives") and \
+                                           hasattr(dist._functional_collectives, "AsyncCollectiveTensor")
+    @staticmethod
+    def free_tensor(tensor, tensor_name):
+        try:
+            tensor.untyped_storage().resize_(0)
+        except Exception as e:
+            logger.warning(f"Failed to free tensor: {tensor_name}, the detail info: {e}.")
+    def is_dtensor(self, tensor):
+        return self.has_dtensor and isinstance(tensor, dist.tensor.DTensor)
+    def is_fake_tensor(self, tensor):
+        return self.has_fake_tensor and isinstance(tensor, torch._subclasses.fake_tensor.FakeTensor)
+    def is_async_collective_tensor(self, tensor):
+        return self.has_async_collective_tensor and \
+            isinstance(tensor, dist._functional_collectives.AsyncCollectiveTensor)
+    def is_empty_data(self, tensor):
+        return tensor.is_meta or self.is_fake_tensor(tensor) or self.is_async_collective_tensor(tensor)
+    def convert_common_tensor(self, tensor):
+        if self.is_dtensor(tensor):
+            return tensor.to_local()
+        if self.is_fake_tensor(tensor):
+            logger.debug("FakeTensor cannot be converted to torch.Tensor type.")
+            return tensor
+        return tensor
+    def get_tensor_type(self, tensor):
+        if self.is_dtensor(tensor):
+            return Const.DTENSOR_TYPE
+        if self.is_fake_tensor(tensor):
+            return Const.FAKE_TENSOR_TYPE
+        if self.is_async_collective_tensor(tensor):
+            return Const.AC_TENSOR_TYPE
+        return Const.TENSOR_TYPE
+    def get_dtensor_info(self, tensor):
+        dtensor_info = {}
+        if not self.is_dtensor(tensor):
+            return dtensor_info
+        if hasattr(tensor, "device_mesh") and tensor.device_mesh:
+            dtensor_info.update({"device_mesh": tensor.device_mesh.mesh.tolist()})
+        placements = []
+        if hasattr(tensor, "placements") and isinstance(tensor.placements, Iterable):
+            for placement in tensor.placements:
+                if placement.is_shard() and is_int(placement.dim):
+                    placements.append({"Shard": {"dim": placement.dim}})
+                    continue
+                if placement.is_replicate():
+                    placements.append({"Replicate": {}})
+                    continue
+                if placement.is_partial() and isinstance(placement.reduce_op, str):
+                    placements.append({"Partial": {"reduce_op": placement.reduce_op}})
+        dtensor_info.update({"placements": placements})
+        return dtensor_info
+    def save_tensor(self, tensor, file_path):
+        common_tensor = self.convert_common_tensor(tensor)
+        if self.is_empty_data(common_tensor):
+            logger.debug(f"Saving fake tensor or meta tensor is not supported, the current tensor is {file_path}.")
+            return
+        if common_tensor.untyped_storage().data_ptr() == 0:
+            logger.debug(f"Saving null-pointer tensor is not supported, the current tensor is {file_path}.")
+            return
+        saved_tensor = common_tensor.clone().contiguous().detach()
+        save_pt(saved_tensor, file_path)
+        self.free_tensor(saved_tensor, file_path)
 class PytorchDataProcessor(BaseDataProcessor):
     pytorch_special_type = (
         torch.device,
@@ -65,6 +146,8 @@ class PytorchDataProcessor(BaseDataProcessor):
             "dtype": self.analyze_dtype_in_kwargs
         }
         self._async_dump_cache = {}
+        self.tensor_handler = TensorHandler()
+        self._crc_executor = ThreadPoolExecutor(max_workers=os.cpu_count() // 2)
     @staticmethod
     def get_md5_for_tensor(x):
@@ -74,6 +157,64 @@ class PytorchDataProcessor(BaseDataProcessor):
         crc32_hash = zlib.crc32(tensor_bytes)
         return f"{crc32_hash:08x}"
+    @staticmethod
+    def tensor_bytes_view_cpu(t: torch.Tensor):
+        """
+        返回 t 在当前 dtype 下的原始字节视图（优先零拷贝）。
+        需保证：t 已在 CPU 且是 contiguous。
+        可能返回 memoryview 或 bytes（兜底拷贝）或者 转为numpy，均可被 zlib.crc32 接受。
+        """
+        nbytes = t.numel() * t.element_size()
+        byte_offset = t.storage_offset() * t.element_size()
+        if nbytes == 0:
+            return memoryview(b"")
+        storage = t.untyped_storage()
+        # ctypes 指针构造 memoryview（零拷贝 FFI）
+        try:
+            addr = storage.data_ptr() + byte_offset
+            buf = (ctypes.c_ubyte * nbytes).from_address(addr)
+            mv3 = memoryview(buf)
+            return mv3
+        except Exception as e1:
+            logger.warning(f"path_A_failed: {e1}.")
+        try:
+            data = ctypes.string_at(storage.data_ptr() + byte_offset, nbytes)
+            return data  # bytes 也可直接用于 zlib.crc32
+        except Exception as e2:
+            logger.warning(f"path_B_failed: {e2}.")
+        try:
+            if t.dtype == torch.bfloat16:
+                t = t.float()
+            data = t.numpy()
+            return data
+        except Exception as e3:
+            logger.warning(f"path_C_failed: {e3}.")
+            return memoryview(b"")
+    @staticmethod
+    def compute_crc32_from_tensor(t: torch.Tensor) -> str:
+        """
+        直接对 Tensor 原始字节做 CRC32。
+        :
+        - "raw": 保持 bfloat16 原始 16bit 字节（推荐，避免升精/增容）
+        """
+        # 取得字节视图（含多级回退），然后做 CRC
+        mv = PytorchDataProcessor.tensor_bytes_view_cpu(t)
+        crc = zlib.crc32(mv)
+        return f"{crc:08x}"
     @staticmethod
     def analyze_device_in_kwargs(element):
         single_arg = {}
@@ -94,80 +235,6 @@ class PytorchDataProcessor(BaseDataProcessor):
     def analyze_dtype_in_kwargs(element):
         return {"type": "torch.dtype", "value": str(element)}
-    @staticmethod
-    def get_stat_info_async(data):
-        tensor_stat = TensorStatInfo()
-        if torch.is_complex(data):
-            logger.warning("Async dump do not support complex data!")
-            return tensor_stat
-        elif data.dtype == torch.bool:
-            tensor_stat.max = torch.any(data)
-            tensor_stat.min = torch.all(data)
-        elif not data.shape:
-            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data
-        else:
-            if data.dtype == torch.float64 or not data.is_floating_point():
-                data = data.float()
-            tensor_stat.max = torch.max(data)
-            tensor_stat.min = torch.min(data)
-            tensor_stat.mean = torch.mean(data)
-            tensor_stat.norm = torch.norm(data)
-        return tensor_stat
-    @staticmethod
-    def get_stat_info_sync(data):
-        tensor_stat = TensorStatInfo()
-        if torch.is_complex(data):
-            data_np = data.cpu().numpy()
-            data_abs = np.abs(data_np)
-            tensor_stat.max = np.max(data_abs).item()
-            tensor_stat.min = np.min(data_abs).item()
-            tensor_stat.mean = np.mean(data_abs).item()
-        elif data.dtype == torch.bool:
-            tensor_stat.max = torch.any(data)
-            tensor_stat.min = torch.all(data)
-        elif not data.shape:
-            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data
-        else:
-            if data.dtype == torch.float64 or not data.is_floating_point():
-                data = data.float()
-            tensor_stat.max = torch.max(data)
-            tensor_stat.min = torch.min(data)
-            tensor_stat.mean = torch.mean(data)
-            tensor_stat.norm = torch.norm(data)
-        return tensor_stat
-    @staticmethod
-    def get_stat_info(data, async_dump=False):
-        tensor_stat = TensorStatInfo()
-        if data.is_meta:
-            return tensor_stat
-        data_clone = data.detach()
-        if not data_clone.numel() or not data_clone.data_ptr():
-            return tensor_stat
-        else:
-            if data_clone.device.type == Const.CPU_LOWERCASE or not async_dump:
-                return PytorchDataProcessor.get_stat_info_sync(data_clone)
-            else:
-                return PytorchDataProcessor.get_stat_info_async(data_clone)
-    @staticmethod
-    def handle_tensor_extremum_nan_inf(tensor, operator):
-        data_clone = tensor.detach()
-        data_nan = torch.isnan(data_clone)
-        if int(torch.sum(data_nan)) == data_clone.numel():
-            return float('nan')
-        finite_mask = torch.isfinite(data_clone)
-        if int(torch.sum(finite_mask)) > 0:
-            finite_values = data_clone[finite_mask]
-            return torch.max(finite_values).item() if operator == 'max' else \
-                torch.min(finite_values).item()
-        else:
-            data_no_nan = data_clone[~data_nan]
-            return torch.max(data_no_nan).item() if operator == 'max' else \
-                torch.min(data_no_nan).item()
     @staticmethod
     def process_group_hash(arg):
         group_ranks = dist.get_process_group_ranks(arg)
@@ -214,9 +281,40 @@ class PytorchDataProcessor(BaseDataProcessor):
     def get_special_types(cls):
         return super().get_special_types() + cls.pytorch_special_type
+    def get_stat_info(self, data, async_dump=False, precision=Const.DUMP_PRECISION_LOW):
+        tensor_stat = TensorStatInfo()
+        if self.tensor_handler.is_empty_data(data):
+            return tensor_stat
+        data_clone = data.detach()
+        if not data_clone.numel() or not data_clone.data_ptr():
+            return tensor_stat
+        if torch.is_complex(data_clone):
+            if async_dump:
+                logger.warning("Async dump do not support complex data!")
+                return tensor_stat
+            data_np = data_clone.cpu().numpy()
+            data_abs = np.abs(data_np)
+            tensor_stat.max = np.max(data_abs).item()
+            tensor_stat.min = np.min(data_abs).item()
+            tensor_stat.mean = np.mean(data_abs).item()
+        elif data_clone.dtype == torch.bool:
+            tensor_stat.max = torch.any(data_clone)
+            tensor_stat.min = torch.all(data_clone)
+        elif not data_clone.shape:
+            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data_clone.clone()
+        else:
+            if (precision == Const.DUMP_PRECISION_HIGH or data_clone.dtype == torch.float64
+                    or not data_clone.is_floating_point()):
+                data_clone = data_clone.float()
+            tensor_stat.max = torch.max(data_clone)
+            tensor_stat.min = torch.min(data_clone)
+            tensor_stat.mean = torch.mean(data_clone)
+            tensor_stat.norm = torch.norm(data_clone)
+        return tensor_stat
     def dump_async_data(self):
         for file_path, tensor in self._async_dump_cache.items():
-            save_pt(tensor.contiguous(), file_path)
+            self.tensor_handler.save_tensor(tensor, file_path)
         self._async_dump_cache.clear()
     def analyze_single_element(self, element, suffix_stack):
@@ -256,11 +354,12 @@ class PytorchDataProcessor(BaseDataProcessor):
         return p2pop_info
     def _analyze_tensor(self, tensor, suffix):
-        tensor_stat = self.get_stat_info(tensor, self.config.async_dump)
+        common_tensor = self.tensor_handler.convert_common_tensor(tensor)
+        tensor_stat = self.get_stat_info(common_tensor, self.config.async_dump, self.config.precision)
         tensor_json = {}
-        tensor_json.update({'type': 'torch.Tensor'})
-        tensor_json.update({'dtype': str(tensor.dtype)})
-        tensor_json.update({"shape": tensor.shape})
+        tensor_json.update({'type': self.tensor_handler.get_tensor_type(tensor)})
+        tensor_json.update({'dtype': str(common_tensor.dtype)})
+        tensor_json.update({"shape": common_tensor.shape})
         stat_values = [
             tensor_stat.max,
@@ -272,26 +371,64 @@ class PytorchDataProcessor(BaseDataProcessor):
         tensor_json.update({Const.TENSOR_STAT_INDEX: placeholder_index})
         tensor_json.update({"requires_grad": tensor.requires_grad})
+        if self.tensor_handler.is_dtensor(tensor):
+            dtensor_info = self.tensor_handler.get_dtensor_info(tensor)
+            tensor_json.update(dtensor_info)
         if self.config.summary_mode == Const.MD5 and not self.config.async_dump:
-            tensor_md5 = self.get_md5_for_tensor(tensor)
-            tensor_json.update({Const.MD5: tensor_md5})
+            tensor_md5 = None
+            if not self.tensor_handler.is_empty_data(tensor):
+                t_cpu = common_tensor
+                # 根据设备类型做同步，确保数据已准备好
+                if t_cpu.device.type == "cuda":
+                    t_cpu = t_cpu.to("cpu", non_blocking=True)
+                    torch.cuda.synchronize()
+                    # 先异步搬运再进行同步可以显著提升性能
+                elif t_cpu.device.type == "npu":
+                    t_cpu = t_cpu.to("cpu", non_blocking=True)
+                    torch.npu.synchronize()
+                t_cpu = t_cpu.detach()
+                if not t_cpu.is_contiguous():
+                    t_cpu = t_cpu.contiguous()
+                future = self._crc_executor.submit(
+                    PytorchDataProcessor.compute_crc32_from_tensor,
+                    t_cpu
+                )
+                crc_placeholder = self.data_writer.append_crc32_to_buffer(future)
+                tensor_json[Const.MD5_INDEX] = crc_placeholder
+            else:
+                logger.debug(
+                    "Calculating the md5 value of fake tensor or meta tensor is not supported, "
+                    f"the current api/module name is {self.current_api_or_module_name}."
+                )
+                tensor_json.update({Const.MD5: tensor_md5})
         return tensor_json
     def _analyze_and_save_tensor(self, tensor, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
         single_arg = PytorchDataProcessor._analyze_tensor(self, tensor, suffix)
+        common_tensor = self.tensor_handler.convert_common_tensor(tensor)
+        if self.tensor_handler.is_empty_data(common_tensor):
+            logger.debug(f"Saving fake tensor or meta tensor is not supported, the current tensor is {file_path}.")
+            return single_arg
+        if common_tensor.untyped_storage().data_ptr() == 0:
+            logger.debug(f"Saving null-pointer tensor is not supported, the current tensor is {file_path}.")
+            return single_arg
         single_arg.update({"data_name": dump_data_name})
         if self.config.async_dump:
-            self._async_dump_cache[file_path] = tensor.clone().detach()
+            self._async_dump_cache[file_path] = common_tensor.clone().detach()
         else:
-            saved_tensor = tensor.clone().contiguous().detach()
-            save_pt(saved_tensor, file_path)
+            self.tensor_handler.save_tensor(common_tensor, file_path)
         return single_arg
     def _analyze_and_save_ndarray(self, ndarray, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
-        save_pt(torch.tensor(ndarray), file_path)
+        self.tensor_handler.save_tensor(torch.tensor(ndarray), file_path)
         ndarray_json = PytorchDataProcessor._analyze_ndarray(ndarray, suffix)
         ndarray_json.update({"data_name": dump_data_name})
         return ndarray_json
@@ -382,7 +519,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
             self._analyze_maybe_overflow_flag()
         if self.has_overflow:
             for file_path, tensor in self.cached_tensors_and_file_paths.items():
-                save_pt(tensor.clone().contiguous().detach(), file_path)
+                self.tensor_handler.save_tensor(tensor, file_path)
             self.real_overflow_nums += 1
             if self.overflow_nums != -1 and self.real_overflow_nums >= self.overflow_nums:
                 logger.info(f"[{Const.TOOL_NAME}] Reached the preset overflow times, "
@@ -427,10 +564,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
     def _analyze_tensor(self, tensor, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
-        if not path_len_exceeds_limit(file_path):
-            self.cached_tensors_and_file_paths.update({file_path: tensor})
-        else:
-            logger.warning(f'The file path {file_path} length exceeds limit.')
+        self.cached_tensors_and_file_paths.update({file_path: tensor})
         single_arg = super()._analyze_tensor(tensor, suffix)
         single_arg.update({"data_name": dump_data_name})
         if not self.has_overflow and self.support_inf_nan:

mindstudio-probe 8.1.2__py3-none-any.whl → 8.2.1__py3-none-any.whl

mindstudio-probe 8.1.2py3-none-any.whl → 8.2.1py3-none-any.whl