PyPI - mindstudio-probe - Versions diffs - 8.1.2__py3-none-any.whl → 8.2.1__py3-none-any.whl - Mend

mindstudio-probe 8.1.2py3-none-any.whl → 8.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/METADATA +2 -2
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/RECORD +172 -147
msprobe/README.md +6 -6
msprobe/core/common/const.py +98 -41
msprobe/core/common/db_manager.py +256 -0
msprobe/core/common/file_utils.py +28 -5
msprobe/core/common/log.py +7 -0
msprobe/core/common/megatron_utils.py +59 -0
msprobe/core/common/parallel_state.py +193 -0
msprobe/core/common/utils.py +20 -13
msprobe/core/common_config.py +5 -0
msprobe/core/compare/acc_compare.py +140 -93
msprobe/core/compare/check.py +13 -0
msprobe/core/compare/compare_cli.py +64 -6
msprobe/core/compare/config.py +10 -8
msprobe/core/compare/diff_analyze/diff_analyze_threshold.yaml +14 -0
msprobe/core/compare/diff_analyze/first_diff_analyze.py +135 -0
msprobe/core/compare/diff_analyze/ignore_op_list.yaml +3 -0
msprobe/core/compare/find_first/__init__.py +0 -0
msprobe/core/compare/find_first/analyzer.py +282 -0
msprobe/core/compare/find_first/data_processor.py +35 -0
msprobe/core/compare/find_first/graph.py +188 -0
msprobe/core/compare/find_first/utils.py +189 -0
msprobe/core/compare/highlight.py +74 -101
msprobe/core/compare/layer_mapping/layer_mapping.py +14 -9
msprobe/core/compare/merge_result/merge_result.py +2 -2
msprobe/core/compare/multiprocessing_compute.py +45 -28
msprobe/core/compare/npy_compare.py +7 -10
msprobe/core/compare/utils.py +338 -130
msprobe/core/config_check/checkers/dataset_checker.py +2 -1
msprobe/core/config_check/checkers/env_args_checker.py +5 -5
msprobe/core/config_check/checkers/hyperparameter_checker.py +30 -10
msprobe/core/config_check/checkers/pip_checker.py +4 -3
msprobe/core/config_check/checkers/random_checker.py +3 -3
msprobe/core/config_check/checkers/weights_checker.py +2 -1
msprobe/core/config_check/ckpt_compare/megatron_loader.py +2 -0
msprobe/core/config_check/resource/hyperparameter.yaml +11 -1
msprobe/core/config_check/utils/hyperparameter_parser.py +7 -3
msprobe/core/config_check/utils/utils.py +10 -0
msprobe/core/data_dump/api_registry.py +49 -30
msprobe/core/data_dump/data_collector.py +71 -29
msprobe/core/data_dump/data_processor/base.py +2 -0
msprobe/core/data_dump/data_processor/mindspore_processor.py +47 -53
msprobe/core/data_dump/data_processor/pytorch_processor.py +227 -93
msprobe/core/data_dump/json_writer.py +81 -7
msprobe/core/data_dump/scope.py +4 -6
msprobe/core/hook_manager.py +129 -70
msprobe/core/monitor/csv2db.py +361 -0
msprobe/core/monitor/db_utils.py +278 -0
msprobe/core/monitor/utils.py +35 -1
msprobe/core/service.py +31 -39
msprobe/core/single_save/single_comparator.py +16 -3
msprobe/docs/01.installation.md +51 -19
msprobe/docs/02.config_introduction.md +16 -20
msprobe/docs/03.config_examples.md +26 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +6 -2
msprobe/docs/06.data_dump_MindSpore.md +44 -7
msprobe/docs/07.accuracy_checker_PyTorch.md +1 -1
msprobe/docs/10.accuracy_compare_PyTorch.md +124 -44
msprobe/docs/11.accuracy_compare_MindSpore.md +75 -7
msprobe/docs/14.data_parse_PyTorch.md +1 -1
msprobe/docs/19.monitor.md +94 -7
msprobe/docs/21.visualization_PyTorch.md +71 -101
msprobe/docs/22.visualization_MindSpore.md +69 -119
msprobe/docs/23.generate_operator_PyTorch.md +1 -1
msprobe/docs/25.tool_function_introduction.md +0 -1
msprobe/docs/26.data_dump_PyTorch_baseline.md +7 -7
msprobe/docs/28.debugger_save_instruction.md +184 -81
msprobe/docs/29.data_dump_MSAdapter.md +6 -0
msprobe/docs/31.config_check.md +4 -2
msprobe/docs/36.calculation_result_change.md +75 -0
msprobe/docs/FAQ.md +22 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +6 -2
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/3.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/4.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/5.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/6.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/7.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt +59 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt +80 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactory_mapping.md +330 -0
msprobe/mindspore/__init__.py +1 -1
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +1 -1
msprobe/mindspore/api_accuracy_checker/api_runner.py +9 -6
msprobe/mindspore/api_accuracy_checker/compute_element.py +18 -12
msprobe/mindspore/cell_processor.py +64 -25
msprobe/mindspore/common/utils.py +51 -7
msprobe/mindspore/compare/common_dir_compare.py +45 -37
msprobe/mindspore/compare/ms_compare.py +10 -2
msprobe/mindspore/compare/ms_graph_compare.py +47 -52
msprobe/mindspore/debugger/debugger_config.py +18 -7
msprobe/mindspore/debugger/precision_debugger.py +16 -12
msprobe/mindspore/dump/cell_dump_process.py +130 -68
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +10 -2
msprobe/mindspore/dump/graph_mode_cell_dump.py +35 -9
msprobe/mindspore/dump/graph_tensor_dump.py +11 -0
msprobe/mindspore/dump/hook_cell/api_register.py +19 -20
msprobe/mindspore/dump/hook_cell/hook_cell.py +12 -34
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +142 -21
msprobe/mindspore/dump/kernel_kbyk_dump.py +24 -0
msprobe/mindspore/exception_dump/__init__.py +0 -0
msprobe/mindspore/exception_dump/exception_dump_tool_factory.py +51 -0
msprobe/mindspore/exception_dump/kernel_graph_exception_dump.py +57 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +5 -4
msprobe/mindspore/mindspore_service.py +2 -2
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +12 -7
msprobe/mindspore/monitor/features.py +82 -0
msprobe/mindspore/monitor/module_hook.py +168 -10
msprobe/mindspore/monitor/utils.py +27 -1
msprobe/mindspore/ms_config.py +12 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +1 -1
msprobe/mindspore/task_handler_factory.py +3 -1
msprobe/nan_analyze/graph.py +1 -1
msprobe/pytorch/api_accuracy_checker/common/config.py +3 -36
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +0 -24
msprobe/pytorch/api_accuracy_checker/compare/compare.py +2 -12
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -6
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +12 -132
msprobe/pytorch/common/utils.py +1 -21
msprobe/pytorch/compare/pt_compare.py +10 -2
msprobe/pytorch/{hook_module/jit_script_wrapper.py → compare/pt_diff_analyze.py} +3 -15
msprobe/pytorch/compare/utils.py +2 -1
msprobe/pytorch/debugger/debugger_config.py +18 -23
msprobe/pytorch/dump/module_dump/hook_wrapper.py +10 -7
msprobe/pytorch/dump/module_dump/module_processer.py +41 -19
msprobe/pytorch/free_benchmark/main.py +7 -4
msprobe/pytorch/hook_module/api_register.py +62 -24
msprobe/pytorch/hook_module/hook_module.py +9 -29
msprobe/pytorch/hook_module/pt_hook_manager.py +84 -15
msprobe/pytorch/hook_module/script_wrapper.py +140 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +6 -0
msprobe/pytorch/monitor/csv2tb.py +1 -1
msprobe/pytorch/monitor/features.py +94 -0
msprobe/pytorch/monitor/module_hook.py +221 -81
msprobe/pytorch/monitor/module_metric.py +27 -1
msprobe/pytorch/monitor/optimizer_collect.py +109 -4
msprobe/pytorch/online_dispatch/dispatch.py +42 -24
msprobe/pytorch/online_dispatch/dump_compare.py +1 -1
msprobe/pytorch/parse_tool/lib/visualization.py +0 -1
msprobe/pytorch/pt_config.py +2 -51
msprobe/pytorch/pytorch_service.py +7 -14
msprobe/visualization/builder/graph_builder.py +192 -63
msprobe/visualization/builder/graph_merger.py +986 -0
msprobe/visualization/builder/msprobe_adapter.py +17 -15
msprobe/visualization/compare/graph_comparator.py +26 -16
msprobe/visualization/db_utils.py +252 -0
msprobe/visualization/graph/base_node.py +2 -22
msprobe/visualization/graph/distributed_analyzer.py +12 -12
msprobe/visualization/graph/graph.py +44 -16
msprobe/visualization/graph_service.py +143 -59
msprobe/visualization/utils.py +103 -4
msprobe/docs/08.accuracy_checker_online_PyTorch.md +0 -295
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +0 -205
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +0 -378
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +0 -239
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +0 -115
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +0 -250
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +0 -63
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +0 -198
msprobe/pytorch/attl_manager.py +0 -65
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/WHEEL +0 -0
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/top_level.txt +0 -0
/msprobe/{pytorch/api_accuracy_checker/tensor_transport_layer → core/compare/diff_analyze}/__init__.py +0 -0

msprobe/core/monitor/csv2db.py ADDED Viewed

@@ -0,0 +1,361 @@
+# Copyright (c) 2025-2026, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import datetime
+import os
+import re
+from collections import OrderedDict, defaultdict
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
+import pytz
+from msprobe.core.common.const import MonitorConst
+from msprobe.core.common.file_utils import (create_directory, read_csv,
+                                            recursive_chmod, remove_path)
+from msprobe.core.common.log import logger
+from msprobe.core.common.utils import is_int
+from msprobe.core.monitor.db_utils import MonitorDB, update_ordered_dict
+from msprobe.core.monitor.utils import get_target_output_dir
+from tqdm import tqdm
+# Constants
+all_data_type_list = [
+    "actv", "actv_grad", "exp_avg", "exp_avg_sq",
+    "grad_unreduced", "grad_reduced", "param_origin", "param_updated", "other"
+]
+@dataclass
+class CSV2DBConfig:
+    """Configuration for CSV to database conversion"""
+    monitor_path: str
+    time_start: Optional[str] = None
+    time_end: Optional[str] = None
+    process_num: int = 1
+    data_type_list: Optional[List[str]] = None
+    output_dirpath: Optional[str] = None
+    step_partition: int = 500
+def validate_process_num(process_num: int) -> None:
+    """Validate process number parameter"""
+    if not is_int(process_num) or process_num <= 0:
+        raise ValueError("process_num must be a positive integer")
+    if process_num > MonitorConst.MAX_PROCESS_NUM:
+        raise ValueError(f"Maximum supported process_num is {MonitorConst.MAX_PROCESS_NUM}")
+def validate_step_partition(step_partition: int) -> None:
+    if not isinstance(step_partition, int):
+        raise TypeError("step_partition must be integer")
+    if not MonitorConst.MIN_PARTITION <= step_partition <= MonitorConst.MAX_PARTITION:
+        raise ValueError(
+            f"step_partition must be between {MonitorConst.MIN_PARTITION} ",
+            f"and {MonitorConst.MAX_PARTITION}, got {step_partition}"
+        )
+def validate_data_type_list(data_type_list: Optional[List[str]]) -> None:
+    """Validate data type list parameter"""
+    if data_type_list is None or not data_type_list:
+        logger.info(f"Using default data types: {all_data_type_list}")
+        return
+    if not isinstance(data_type_list, list):
+        raise ValueError("data_type_list must be a list")
+    invalid_types = [t for t in data_type_list if t not in all_data_type_list]
+    if invalid_types:
+        raise ValueError(f"Unsupported data types: {invalid_types}")
+def get_info_from_filename(file_name, metric_list=None):
+    metric_name = "_".join(file_name.split('_')[:-1])
+    if metric_list and metric_name not in metric_list:
+        return "", 0, 0
+    match = re.match(f"{metric_name}{MonitorConst.CSV_FILE_PATTERN}", file_name)
+    if not match:
+        return "", 0, 0
+    step_start, step_end = match.groups()
+    return metric_name, step_start, step_end
+def _pre_scan_single_rank(rank: int, files: List[str]) -> Dict:
+    """Pre-scan files for a single rank to collect metadata"""
+    metrics = set()
+    min_step = None
+    max_step = 0
+    metric_stats = defaultdict(set)
+    targets = OrderedDict()
+    for file_path in files:
+        file_name = os.path.basename(file_path)
+        metric_name, step_start, step_end = get_info_from_filename(file_name)
+        if not metric_name:
+            continue
+        step_start, step_end = int(step_start), int(step_end)
+        metrics.add(metric_name)
+        min_step = min(
+            step_start if min_step is None else min_step, step_start)
+        max_step = max(max_step, step_end)
+        data = read_csv(file_path)
+        stats = [k for k in data.keys() if k in MonitorConst.OP_MONVIS_SUPPORTED]
+        metric_stats[metric_name].update(stats)
+        for row_id, row in data.iterrows():
+            try:
+                name = row[MonitorConst.HEADER_NAME]
+                vpp_stage = int(row['vpp_stage'])
+                micro_step = int(row.get('micro_step', MonitorConst.DEFAULT_INT_VALUE))
+            except (ValueError, KeyError) as e:
+                logger.warning(
+                    f"CSV conversion failed | file={file_path}:{row_id+2} | error={str(e)}")
+                continue
+            target = (name, vpp_stage, micro_step)
+            if target not in targets:
+                targets[target] = None
+    return {
+        'max_rank': int(rank),
+        'metrics': metrics,
+        'min_step': min_step,
+        'max_step': max_step,
+        'metric_stats': metric_stats,
+        'targets': list(targets.keys())
+    }
+def _pre_scan(monitor_db: MonitorDB, data_dirs: Dict[int, str], data_type_list: List[str], workers: int = 1):
+    """Pre-scan all targets, metrics, and statistics"""
+    logger.info("Scanning dimensions...")
+    rank_files = defaultdict(list)
+    # Collect files for each rank
+    for rank, dir_path in data_dirs.items():
+        files = os.listdir(dir_path)
+        for file in files:
+            metric_name, _, _ = get_info_from_filename(
+                file, metric_list=data_type_list)
+            if not metric_name:
+                continue
+            rank_files[rank].append(os.path.join(dir_path, file))
+    # Parallel pre-scan
+    with ProcessPoolExecutor(max_workers=workers) as executor:
+        futures = {
+            executor.submit(_pre_scan_single_rank, rank, files): rank
+            for rank, files in rank_files.items()
+        }
+        results = []
+        with tqdm(total=len(futures), desc="Pre-scanning ranks") as pbar:
+            for future in as_completed(futures):
+                rank = futures[future]
+                try:
+                    result = future.result()
+                    results.append(result)
+                except Exception as e:
+                    logger.error(
+                        f"Error pre-scanning rank {rank}: {str(e)}")
+                pbar.update(1)
+    # Aggregate results
+    targets = OrderedDict()
+    metrics = set()
+    min_step = None
+    max_step = 0
+    max_rank = 0
+    metric_stats = defaultdict(set)
+    for rank_result in results:
+        max_rank = max(max_rank, rank_result['max_rank'])
+        metrics.update(rank_result['metrics'])
+        min_step = min(
+            min_step if min_step is not None else rank_result['min_step'],
+            rank_result['min_step']
+        )
+        max_step = max(max_step, rank_result['max_step'])
+        for metric, stats in rank_result['metric_stats'].items():
+            metric_stats[metric].update(stats)
+        targets = update_ordered_dict(targets, rank_result['targets'])
+    monitor_db.insert_dimensions(
+        targets, metrics, metric_stats, min_step=min_step, max_step=max_step)
+    monitor_db.update_global_stats(
+        max_rank=max_rank, min_step=min_step, max_step=max_step)
+    return rank_files
+def process_single_rank(
+    task: Tuple[int, List[str]],
+    metric_id_dict: Dict[str, Tuple[int, List[str]]],
+    target_dict: Dict[Tuple[str, int, int], int],
+    step_partition_size: int,
+    db_path: str
+) -> int:
+    """Process data import for a single rank"""
+    rank, files = task
+    db = MonitorDB(db_path, step_partition_size=step_partition_size)
+    total_inserted = 0
+    table_batches = defaultdict(list)
+    for file in files:
+        filename = os.path.basename(file)
+        metric_name, _, _ = get_info_from_filename(filename)
+        if not metric_name:
+            continue
+        metric_info = metric_id_dict.get(metric_name)
+        if not metric_info:
+            continue
+        metric_id, stats = metric_info
+        for row_id, row in read_csv(file).iterrows():
+            try:
+                # Parse row data
+                name = row.get(MonitorConst.HEADER_NAME)
+                vpp_stage = int(row['vpp_stage'])
+                micro_step = int(row.get('micro_step', MonitorConst.DEFAULT_INT_VALUE))
+                target_id = target_dict.get((name, vpp_stage, micro_step))
+                if not target_id:
+                    continue
+                step = int(row['step'])
+                table_name, _, _ = db.get_metric_table_name(metric_id, step)
+                # Prepare row data
+                row_data = [rank, step, target_id]
+                row_data.extend(
+                    float(row[stat]) if stat in row else None
+                    for stat in stats
+                )
+            except (ValueError, KeyError) as e:
+                logger.error(
+                    f"CSV conversion failed | file={file}:{row_id+2} | error={str(e)}")
+                continue
+            table_batches[table_name].append(tuple(row_data))
+            # Batch insert when threshold reached
+            if len(table_batches[table_name]) >= MonitorConst.BATCH_SIZE:
+                inserted = db.insert_rows(
+                    table_name, table_batches[table_name])
+                if inserted is not None:
+                    total_inserted += inserted
+                table_batches[table_name] = []
+    # Insert remaining data
+    for table_name, batch in table_batches.items():
+        if batch:
+            inserted = db.insert_rows(table_name, batch)
+            if inserted is not None:
+                total_inserted += inserted
+    logger.info(f"Rank {rank} inserted {total_inserted} rows")
+    return total_inserted
+def import_data(monitor_db: MonitorDB, data_dirs: Dict[int, str], data_type_list: List[str], workers: int = 4) -> bool:
+    """Main method to import data into database"""
+    # 1. Pre-scan to get rank tasks
+    monitor_db.init_schema()
+    rank_tasks = _pre_scan(monitor_db, data_dirs, data_type_list, workers)
+    if not rank_tasks:
+        logger.error("No valid data files found during pre-scan")
+        return False
+    # 2. Get metric and target mappings
+    try:
+        metric_id_dict = monitor_db.get_metric_mapping()
+        target_dict = monitor_db.get_target_mapping()
+    except Exception as e:
+        logger.error(f"Failed to get database mappings: {str(e)}")
+        return False
+    # 3. Process data for each rank in parallel
+    total_files = sum(len(files) for files in rank_tasks.values())
+    logger.info(f"Starting data import for {len(rank_tasks)} ranks,"
+                f"{total_files} files..."
+                )
+    all_succeeded = True
+    with ProcessPoolExecutor(max_workers=workers) as executor:
+        futures = {
+            executor.submit(
+                process_single_rank,
+                (rank, files),
+                metric_id_dict,
+                target_dict,
+                monitor_db.step_partition_size,
+                monitor_db.db_path): rank
+            for rank, files in rank_tasks.items()
+        }
+        with tqdm(as_completed(futures), total=len(futures), desc="Import progress") as pbar:
+            for future in pbar:
+                rank = futures[future]
+                try:
+                    inserted = future.result()
+                    pbar.set_postfix_str(
+                        f"Rank {rank}: inserted {inserted} rows")
+                except Exception as e:
+                    logger.error(
+                        f"Failed to process Rank {rank}: {str(e)}")
+                    all_succeeded = False
+    return all_succeeded
+def csv2db(config: CSV2DBConfig) -> None:
+    """Main function to convert CSV files to database"""
+    validate_process_num(config.process_num)
+    validate_step_partition(config.step_partition)
+    validate_data_type_list(config.data_type_list)
+    target_output_dirs = get_target_output_dir(
+        config.monitor_path, config.time_start, config.time_end)
+    if config.output_dirpath is None:
+        local_tz = pytz.timezone("Asia/Shanghai")
+        cur_time = datetime.datetime.now(local_tz).strftime("%b%d_%H-%M-%S")
+        config.output_dirpath = os.path.join(
+            config.monitor_path, f"{cur_time}-csv2db")
+    create_directory(config.output_dirpath)
+    db_path = os.path.join(config.output_dirpath, "monitor_metrics.db")
+    if os.path.exists(db_path):
+        remove_path(db_path)
+        logger.warning(f"Existing path {db_path} will be recovered")
+    db = MonitorDB(db_path, step_partition_size=config.step_partition)
+    result = import_data(
+        db,
+        target_output_dirs,
+        config.data_type_list if config.data_type_list else all_data_type_list,
+        workers=config.process_num
+    )
+    recursive_chmod(config.output_dirpath)
+    if result:
+        logger.info(
+            f"Data import completed. Output saved to: {config.output_dirpath}")
+    else:
+        logger.warning(
+            f"Data import may be incomplete. Output directory: {config.output_dirpath} "
+            f"(Some records might have failed)"
+        )

msprobe/core/monitor/db_utils.py ADDED Viewed

@@ -0,0 +1,278 @@
+# Copyright (c) 2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import OrderedDict
+from collections.abc import Iterable
+from typing import Dict, List, Optional, Set, Tuple
+from msprobe.core.common.const import MonitorConst
+from msprobe.core.common.db_manager import DBManager
+def update_ordered_dict(main_dict: OrderedDict, new_list: List) -> OrderedDict:
+    """Update ordered dictionary with new items"""
+    for item in new_list:
+        if item not in main_dict:
+            main_dict[item] = None
+    return main_dict
+def get_ordered_stats(stats: Iterable) -> List[str]:
+    """Get statistics in predefined order"""
+    if not isinstance(stats, Iterable):
+        return []
+    return [stat for stat in MonitorConst.OP_MONVIS_SUPPORTED if stat in stats]
+class MonitorSql:
+    """数据库表参数类"""
+    @staticmethod
+    def create_monitoring_targets_table():
+        """监控目标表"""
+        return """
+        CREATE TABLE IF NOT EXISTS monitoring_targets (
+            target_id INTEGER PRIMARY KEY AUTOINCREMENT,
+            target_name TEXT NOT NULL,
+            vpp_stage INTEGER NOT NULL,
+            micro_step INTEGER NOT NULL DEFAULT 0,
+            UNIQUE(target_name, vpp_stage, micro_step)
+        )"""
+    @staticmethod
+    def create_monitoring_metrics_table():
+        """监控指标表"""
+        return """
+        CREATE TABLE IF NOT EXISTS monitoring_metrics (
+            metric_id INTEGER PRIMARY KEY AUTOINCREMENT,
+            metric_name TEXT UNIQUE NOT NULL
+        )"""
+    @staticmethod
+    def get_metric_mapping_sql():
+        return """
+        SELECT m.metric_id, m.metric_name, GROUP_CONCAT(ms.stat_name) as stats
+        FROM monitoring_metrics m
+        LEFT JOIN metric_stats ms ON m.metric_id = ms.metric_id
+        GROUP BY m.metric_id
+        """
+    @staticmethod
+    def create_metric_stats_table():
+        """指标统计表"""
+        return """
+        CREATE TABLE IF NOT EXISTS metric_stats (
+            metric_id INTEGER NOT NULL,
+            stat_name TEXT NOT NULL,
+            PRIMARY KEY (metric_id, stat_name),
+            FOREIGN KEY (metric_id) REFERENCES monitoring_metrics(metric_id)
+        ) WITHOUT ROWID"""
+    @staticmethod
+    def create_global_stat_table():
+        return """
+        CREATE TABLE IF NOT EXISTS global_stats (
+            stat_name TEXT PRIMARY KEY,
+            stat_value INTEGER NOT NULL
+        ) WITHOUT ROWID"""
+    @classmethod
+    def get_table_definition(cls, table_name=""):
+        """
+        获取表定义SQL
+        :param table_name: 表名
+        :return: 建表SQL语句
+        :raises ValueError: 当表名不存在时
+        """
+        table_creators = {
+            "monitoring_targets": cls.create_monitoring_targets_table,
+            "monitoring_metrics": cls.create_monitoring_metrics_table,
+            "metric_stats": cls.create_metric_stats_table,
+            "global_stats": cls.create_global_stat_table,
+        }
+        if not table_name:
+            return [table_creators.get(table, lambda x: "")() for table in table_creators]
+        if table_name not in table_creators:
+            raise ValueError(f"Unsupported table name: {table_name}")
+        return table_creators[table_name]()
+    @classmethod
+    def get_metric_table_definition(cls, table_name, stats, patition=None):
+        stat_columns = [f"{stat} REAL DEFAULT NULL" for stat in stats]
+        if patition and len(patition) == 2:
+            partition_start_step, partition_end_step = patition
+            step_column = f"""step INTEGER NOT NULL CHECK(step BETWEEN {partition_start_step}
+                    AND {partition_end_step}),"""
+        else:
+            step_column = "step INTEGER NOT NULL"
+        create_sql = f"""
+            CREATE TABLE {table_name} (
+                rank INTEGER NOT NULL,
+                {step_column}
+                target_id INTEGER NOT NULL,
+                {', '.join(stat_columns)},
+                PRIMARY KEY (rank, step, target_id),
+                FOREIGN KEY (target_id) REFERENCES monitoring_targets(target_id)
+            ) WITHOUT ROWID
+            """
+        return create_sql
+class MonitorDB:
+    """Main class for monitoring database operations"""
+    def __init__(self, db_path: str, step_partition_size: int = 500):
+        self.db_path = db_path
+        self.db_manager = DBManager(db_path)
+        self.step_partition_size = step_partition_size
+    def get_metric_table_name(self, metric_id: int, step: int) -> str:
+        """Generate metric table name"""
+        step_start = (
+            step // self.step_partition_size) * self.step_partition_size
+        step_end = step_start + self.step_partition_size - 1
+        return f"metric_{metric_id}_step_{step_start}_{step_end}", step_start, step_end
+    def init_schema(self) -> None:
+        """Initialize database schema"""
+        self.db_manager.execute_multi_sql(MonitorSql.get_table_definition())
+        # Insert initial global stats
+        global_stats = [
+            ('max_rank', 0),
+            ('min_step', 0),
+            ('max_step', 0),
+            ('step_partition_size', self.step_partition_size)
+        ]
+        self.db_manager.insert_data("global_stats", global_stats)
+    def insert_dimensions(
+        self,
+        targets: OrderedDict,
+        metrics: Set[str],
+        metric_stats: Dict[str, Set[str]],
+        min_step: Optional[int] = None,
+        max_step: int = None,
+    ) -> None:
+        """Insert dimension data into database"""
+        # Insert targets
+        self.db_manager.insert_data(
+            "monitoring_targets",
+            [(name, vpp_stage, micro_step)
+             for (name, vpp_stage, micro_step) in targets],
+            key_list=["target_name", "vpp_stage", "micro_step"]
+        )
+        # Insert metrics
+        self.db_manager.insert_data(
+            "monitoring_metrics",
+            [(metric,) for metric in metrics],
+            key_list=["metric_name"]
+        )
+        # Insert metric-stat relationships
+        for metric, stats in metric_stats.items():
+            metric_id = self._get_metric_id(metric)
+            ordered_stats = get_ordered_stats(stats)
+            self.db_manager.insert_data(
+                "metric_stats",
+                [(metric_id, stat) for stat in ordered_stats],
+                key_list=["metric_id", "stat_name"]
+            )
+            # Create metric tables for each partition
+            if min_step is not None and max_step is not None:
+                first_partition = min_step // self.step_partition_size
+                last_partition = max_step // self.step_partition_size
+                for partition in range(first_partition, last_partition + 1):
+                    step_start = partition * self.step_partition_size
+                    self.create_metric_table(
+                        metric_id, step_start, ordered_stats)
+    def insert_rows(self, table_name, rows):
+        if not self.db_manager.table_exists(table_name):
+            raise RuntimeError(f"{table_name} not existed in {self.db_path}")
+        inserted = self.db_manager.insert_data(table_name, rows)
+        inserted = 0 if inserted is None else inserted
+        return inserted
+    def create_metric_table(self, metric_id: int, step: int, stats: List[str]) -> str:
+        """Create metric table for a specific partition"""
+        table_name, partition_start_step, partition_end_step = self.get_metric_table_name(
+            metric_id,
+            step
+        )
+        if self.db_manager.table_exists(table_name):
+            return table_name
+        create_sql = MonitorSql.get_metric_table_definition(
+            table_name, stats, patition=(
+                partition_start_step, partition_end_step)
+        )
+        self.db_manager.execute_sql(create_sql)
+        return table_name
+    def update_global_stats(self, max_rank: int = None, min_step: Optional[int] = None, max_step: int = None) -> None:
+        """Update global statistics"""
+        updates = [
+            ("max_rank", max_rank),
+            ("min_step", min_step),
+            ("max_step", max_step)
+        ]
+        for stat_name, value in updates:
+            if not value:
+                continue
+            self.db_manager.update_data(
+                table_name="global_stats",
+                updates={"stat_value": value},
+                where={"stat_name": stat_name}
+            )
+    def get_metric_mapping(self) -> Dict[str, Tuple[int, List[str]]]:
+        """Get metric name to ID mapping with statistics"""
+        results = self.db_manager.execute_sql(
+            MonitorSql.get_metric_mapping_sql()
+        )
+        return {
+            row["metric_name"]: (
+                row["metric_id"],
+                get_ordered_stats(row["stats"].split(",")
+                                  ) if row["stats"] else []
+            ) for row in results
+        }
+    def get_target_mapping(self) -> Dict[Tuple[str, int, int], int]:
+        """Get target mapping dictionary"""
+        results = self.db_manager.select_data(
+            table_name="monitoring_targets",
+            columns=["target_id", "target_name", "vpp_stage", "micro_step"]
+        )
+        if not results:
+            return {}
+        return {
+            (row["target_name"], row["vpp_stage"], row["micro_step"]): row["target_id"]
+            for row in results
+        }
+    def _get_metric_id(self, metric_name: str) -> Optional[int]:
+        """Get metric ID by name"""
+        result = self.db_manager.select_data(
+            table_name="monitoring_metrics",
+            columns=["metric_id"],
+            where={"metric_name": metric_name}
+        )
+        return result[0]["metric_id"] if result else None

msprobe/core/monitor/utils.py CHANGED Viewed

@@ -96,8 +96,33 @@ def validate_targets(targets):
             raise TypeError('key of targets should be module_name[str] in config.json')
         if not isinstance(field, dict):
             raise TypeError('values of targets should be cared filed e.g. {"input": "tensor"} in config.json')
+def validate_l2_targets(targets):
+    if not isinstance(targets, dict):
+        raise TypeError('l2_targets in config.json should be a dict')
+    for hook_name, target_list in targets.items():
+        if hook_name not in MonitorConst.L2_HOOKS:
+            raise TypeError(f'key of l2_targtes must be in {MonitorConst.L2_HOOKS}, got {hook_name}')
+        if not isinstance(target_list, list):
+            raise TypeError('values of l2_targets should be a list in config.json')
+        for item in target_list:
+            if not isinstance(item, str):
+                raise TypeError(f'item of "{hook_name}" in l2_targets should be module_name[str] in config.json')
+def validate_recording_l2_features(recording_l2_features):
+    if not isinstance(recording_l2_features, bool):
+        raise TypeError("recording_l2_features should be a bool")
+def validate_sa_order(sa_order):
+    if isinstance(sa_order, str):
+        sa_order = sa_order.replace(' ', '')
+    if sa_order not in MonitorConst.SA_ORDERS:
+        raise TypeError(f'sa_order must be in {MonitorConst.SA_ORDERS}, got {sa_order}')
 def validate_print_struct(print_struct):
     if not isinstance(print_struct, bool):
         raise TypeError("print_struct should be a bool")
@@ -216,6 +241,15 @@ def validate_config(config):
     targets = config.get("targets", {})
     validate_targets(targets)
+    l2_targets = config.get("l2_targets", {})
+    validate_l2_targets(l2_targets)
+    recording_l2_features = config.get("recording_l2_features", False)
+    validate_recording_l2_features(recording_l2_features)
+    sa_order = config.get("sa_order", "s,b,h,d")
+    validate_sa_order(sa_order)
     print_struct = config.get('print_struct', False)
     validate_print_struct(print_struct)

mindstudio-probe 8.1.2__py3-none-any.whl → 8.2.1__py3-none-any.whl

mindstudio-probe 8.1.2py3-none-any.whl → 8.2.1py3-none-any.whl