PyPI - mindstudio-probe - Versions diffs - 1.3.0__py3-none-any.whl → 8.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.3.0py3-none-any.whl → 8.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (213) hide show

{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/METADATA +4 -2
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/RECORD +204 -152
msprobe/README.md +32 -1
msprobe/core/__init__.py +17 -0
msprobe/core/common/const.py +120 -21
msprobe/core/common/exceptions.py +2 -2
msprobe/core/common/file_utils.py +279 -50
msprobe/core/common/framework_adapter.py +169 -0
msprobe/core/common/global_lock.py +86 -0
msprobe/core/common/runtime.py +25 -0
msprobe/core/common/utils.py +136 -45
msprobe/core/common_config.py +7 -0
msprobe/core/compare/acc_compare.py +646 -428
msprobe/core/compare/check.py +36 -103
msprobe/core/compare/compare_cli.py +4 -0
msprobe/core/compare/config.py +72 -0
msprobe/core/compare/highlight.py +215 -215
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -0
msprobe/core/compare/merge_result/merge_result.py +4 -4
msprobe/core/compare/multiprocessing_compute.py +223 -110
msprobe/core/compare/npy_compare.py +2 -4
msprobe/core/compare/utils.py +214 -244
msprobe/core/config_check/__init__.py +17 -0
msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
msprobe/core/config_check/checkers/base_checker.py +60 -0
msprobe/core/config_check/checkers/dataset_checker.py +138 -0
msprobe/core/config_check/checkers/env_args_checker.py +96 -0
msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
msprobe/core/config_check/checkers/pip_checker.py +90 -0
msprobe/core/config_check/checkers/random_checker.py +367 -0
msprobe/core/config_check/checkers/weights_checker.py +147 -0
msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
msprobe/core/config_check/config_check_cli.py +51 -0
msprobe/core/config_check/config_checker.py +100 -0
msprobe/{mindspore/runtime.py → core/config_check/resource/dependency.yaml} +7 -4
msprobe/core/config_check/resource/env.yaml +57 -0
msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
msprobe/core/config_check/utils/utils.py +107 -0
msprobe/core/data_dump/api_registry.py +67 -4
msprobe/core/data_dump/data_collector.py +170 -89
msprobe/core/data_dump/data_processor/base.py +72 -51
msprobe/core/data_dump/data_processor/mindspore_processor.py +109 -55
msprobe/core/data_dump/data_processor/pytorch_processor.py +90 -82
msprobe/core/data_dump/json_writer.py +143 -27
msprobe/core/debugger/precision_debugger.py +144 -0
msprobe/core/grad_probe/constant.py +1 -1
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/grad_probe/utils.py +1 -1
msprobe/core/hook_manager.py +242 -0
msprobe/core/monitor/anomaly_processor.py +384 -0
msprobe/core/service.py +357 -0
msprobe/core/single_save/__init__.py +0 -0
msprobe/core/single_save/single_comparator.py +243 -0
msprobe/core/single_save/single_saver.py +146 -0
msprobe/docs/01.installation.md +6 -5
msprobe/docs/02.config_introduction.md +79 -22
msprobe/docs/03.config_examples.md +1 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +118 -49
msprobe/docs/06.data_dump_MindSpore.md +167 -20
msprobe/docs/07.accuracy_checker_PyTorch.md +2 -2
msprobe/docs/08.accuracy_checker_online_PyTorch.md +69 -9
msprobe/docs/09.accuracy_checker_MindSpore.md +18 -6
msprobe/docs/10.accuracy_compare_PyTorch.md +212 -74
msprobe/docs/11.accuracy_compare_MindSpore.md +87 -37
msprobe/docs/12.overflow_check_PyTorch.md +2 -2
msprobe/docs/13.overflow_check_MindSpore.md +2 -2
msprobe/docs/14.data_parse_PyTorch.md +3 -3
msprobe/docs/17.grad_probe.md +2 -1
msprobe/docs/18.online_dispatch.md +2 -2
msprobe/docs/19.monitor.md +90 -44
msprobe/docs/21.visualization_PyTorch.md +68 -15
msprobe/docs/22.visualization_MindSpore.md +71 -18
msprobe/docs/25.tool_function_introduction.md +23 -22
msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
msprobe/docs/27.dump_json_instruction.md +1 -1
msprobe/docs/28.debugger_save_instruction.md +111 -20
msprobe/docs/29.data_dump_MSAdapter.md +2 -2
msprobe/docs/30.overflow_check_MSAdapter.md +2 -2
msprobe/docs/31.config_check.md +95 -0
msprobe/docs/32.ckpt_compare.md +69 -0
msprobe/docs/33.generate_operator_MindSpore.md +181 -0
msprobe/docs/34.RL_collect.md +92 -0
msprobe/docs/35.nan_analyze.md +72 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/save_compare_result_sample.png +0 -0
msprobe/docs/img/visualization/proxy.png +0 -0
msprobe/mindspore/__init__.py +1 -2
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +150 -58
msprobe/mindspore/api_accuracy_checker/api_runner.py +7 -3
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +47 -69
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +0 -1
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -2
msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +460 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +9 -0
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/cell_processor.py +204 -33
msprobe/mindspore/code_mapping/graph_parser.py +4 -21
msprobe/mindspore/common/const.py +17 -7
msprobe/mindspore/common/utils.py +128 -11
msprobe/mindspore/compare/common_dir_compare.py +382 -0
msprobe/mindspore/compare/distributed_compare.py +2 -26
msprobe/mindspore/compare/ms_compare.py +17 -405
msprobe/mindspore/compare/ms_graph_compare.py +14 -5
msprobe/mindspore/compare/utils.py +37 -0
msprobe/mindspore/debugger/debugger_config.py +53 -3
msprobe/mindspore/debugger/precision_debugger.py +72 -91
msprobe/mindspore/dump/cell_dump_process.py +877 -0
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +864 -0
msprobe/mindspore/dump/dump_tool_factory.py +13 -5
msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
msprobe/mindspore/dump/hook_cell/api_register.py +40 -6
msprobe/mindspore/dump/hook_cell/hook_cell.py +18 -7
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +18 -0
msprobe/mindspore/dump/jit_dump.py +21 -18
msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -15
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +12 -6
msprobe/mindspore/free_benchmark/common/utils.py +1 -1
msprobe/mindspore/grad_probe/global_context.py +7 -2
msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
msprobe/mindspore/mindspore_service.py +114 -0
msprobe/mindspore/monitor/common_func.py +52 -0
msprobe/mindspore/monitor/data_writers.py +237 -0
msprobe/mindspore/monitor/features.py +20 -7
msprobe/mindspore/monitor/module_hook.py +281 -209
msprobe/mindspore/monitor/optimizer_collect.py +334 -0
msprobe/mindspore/monitor/utils.py +25 -5
msprobe/mindspore/ms_config.py +16 -15
msprobe/mindspore/task_handler_factory.py +5 -2
msprobe/msprobe.py +19 -0
msprobe/nan_analyze/__init__.py +14 -0
msprobe/nan_analyze/analyzer.py +255 -0
msprobe/nan_analyze/graph.py +189 -0
msprobe/nan_analyze/utils.py +211 -0
msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +20 -20
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +4 -7
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +204 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +12 -11
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +1 -0
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +8 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +2 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +156 -0
msprobe/pytorch/attl_manager.py +65 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
msprobe/pytorch/common/utils.py +26 -14
msprobe/pytorch/compare/distributed_compare.py +4 -36
msprobe/pytorch/compare/pt_compare.py +13 -84
msprobe/pytorch/compare/utils.py +47 -0
msprobe/pytorch/debugger/debugger_config.py +34 -17
msprobe/pytorch/debugger/precision_debugger.py +66 -118
msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
msprobe/pytorch/dump/module_dump/module_dump.py +11 -58
msprobe/pytorch/dump/module_dump/module_processer.py +143 -113
msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
msprobe/pytorch/hook_module/api_register.py +29 -5
msprobe/pytorch/hook_module/hook_module.py +9 -18
msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +22 -1
msprobe/pytorch/hook_module/utils.py +28 -2
msprobe/pytorch/monitor/csv2tb.py +6 -2
msprobe/pytorch/monitor/data_writers.py +259 -0
msprobe/pytorch/monitor/module_hook.py +227 -158
msprobe/pytorch/monitor/module_metric.py +14 -0
msprobe/pytorch/monitor/optimizer_collect.py +242 -270
msprobe/pytorch/monitor/utils.py +16 -3
msprobe/pytorch/online_dispatch/dispatch.py +4 -2
msprobe/pytorch/online_dispatch/dump_compare.py +5 -2
msprobe/pytorch/parse_tool/lib/utils.py +3 -3
msprobe/pytorch/pt_config.py +8 -7
msprobe/pytorch/pytorch_service.py +73 -0
msprobe/visualization/builder/graph_builder.py +33 -13
msprobe/visualization/builder/msprobe_adapter.py +24 -11
msprobe/visualization/compare/graph_comparator.py +53 -45
msprobe/visualization/compare/mode_adapter.py +31 -1
msprobe/visualization/graph/base_node.py +3 -3
msprobe/visualization/graph/graph.py +2 -2
msprobe/visualization/graph_service.py +250 -103
msprobe/visualization/utils.py +27 -11
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -106
msprobe/mindspore/monitor/anomaly_detect.py +0 -404
msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
msprobe/mindspore/service.py +0 -549
msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
msprobe/pytorch/monitor/anomaly_detect.py +0 -410
msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
msprobe/pytorch/service.py +0 -473
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
/msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
/msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0

msprobe/core/config_check/checkers/dataset_checker.py ADDED Viewed

@@ -0,0 +1,138 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import json
+import pandas as pd
+from msprobe.core.common.file_utils import create_file_in_zip, load_json
+from msprobe.core.config_check.checkers.base_checker import BaseChecker
+from msprobe.core.config_check.config_checker import register_checker_item, register_pre_forward_fun_list
+from msprobe.core.config_check.utils.utils import config_checking_print, get_tensor_features
+from msprobe.core.common.decorator import recursion_depth_decorator
+from msprobe.core.common.framework_adapter import FmkAdp
+@recursion_depth_decorator("config_check: process_obj")
+def process_obj(obj):
+    if FmkAdp.is_tensor(obj):
+        return get_tensor_features(obj)
+    elif isinstance(obj, (tuple, list)):
+        return {i: process_obj(x) for i, x in enumerate(obj)}
+    elif isinstance(obj, dict):
+        return {k: process_obj(v) for k, v in obj.items()}
+    else:
+        return ""
+def parse_args_and_kargs(args, kwargs):
+    processed_args = process_obj(args)
+    processed_kargs = process_obj(kwargs)
+    return {
+        'args': processed_args,
+        'kwargs': processed_kargs
+    }
+@recursion_depth_decorator("config_check: compare_dataset_dicts")
+def compare_dataset_dicts(dict1, dict2, tag=''):
+    results = []
+    # 处理 dict1 中的键
+    for key in dict1:
+        new_tag = f"{tag}.{key}" if tag else key
+        if key not in dict2:
+            result = {'tag': new_tag, 'equal': False, 'status': 'delete'}
+            results.append(result)
+            continue
+        value1 = dict1[key]
+        value2 = dict2[key]
+        if not isinstance(value1, dict):
+            continue
+        if set(value1.keys()) == {'max', 'min', 'mean', 'norm'}:
+            equal = value1 == value2
+            relative_diffs = {
+                f"{k}_relative_diff": (abs(value1[k] - value2[k]) / value1[k]) if value1[k] != 0 else None
+                for k in ['max', 'min', 'mean', 'norm']
+            }
+            result = {'tag': new_tag, 'equal': equal, 'status': 'unchanged'}
+            result.update(relative_diffs)
+            results.append(result)
+        else:
+            results.extend(compare_dataset_dicts(value1, value2, new_tag))
+    # 处理 dict2 中独有的键
+    for key in dict2:
+        if key not in dict1:
+            new_tag = f"{tag}.{key}" if tag else key
+            result = {'tag': new_tag, 'equal': False, 'status': 'added'}
+            results.append(result)
+    return results
+def compare_dataset(bench_dir, cmp_dir):
+    all_results = []
+    for step in os.listdir(bench_dir):
+        step_path_bench = os.path.join(bench_dir, step)
+        if not os.path.isdir(step_path_bench):
+            continue
+        step_path_cmp = os.path.join(cmp_dir, step)
+        for rank in os.listdir(step_path_bench):
+            rank_path_bench = os.path.join(step_path_bench, rank, 'dataset.json')
+            rank_path_cmp = os.path.join(step_path_cmp, rank, 'dataset.json')
+            if not os.path.isfile(rank_path_bench) or not os.path.isfile(rank_path_cmp):
+                continue
+            dict1 = load_json(rank_path_bench)
+            dict2 = load_json(rank_path_cmp)
+            results = compare_dataset_dicts(dict1, dict2)
+            for result in results:
+                result['step'] = int(step.replace("step", ""))
+                result['rank'] = int(rank.replace("rank", ""))
+            all_results.extend(results)
+    df = pd.DataFrame(all_results, columns=DatasetChecker.result_header)
+    df = df.sort_values(by=['step', 'rank'], ascending=[True, True])
+    return df
+@register_checker_item("dataset")
+class DatasetChecker(BaseChecker):
+    input_needed = "model"
+    multi_rank = True
+    target_name_in_zip = "dataset"
+    result_header = ['step', 'rank', 'tag', 'equal', 'max_relative_diff',
+                     'min_relative_diff', 'mean_relative_diff', 'norm_relative_diff']
+    @staticmethod
+    def pack(pack_input):
+        output_zip_path = pack_input.output_zip_path
+        def collect_input(model, args, kwargs, step):
+            features = parse_args_and_kargs(args, kwargs)
+            dataset_filepath = os.path.join(DatasetChecker.target_name_in_zip,
+                                            f"step{step}", f"rank{FmkAdp.get_rank_id()}", "dataset.json")
+            create_file_in_zip(output_zip_path, dataset_filepath, json.dumps(features, indent=4))
+            config_checking_print(f"add first dataset input features to zip")
+        register_pre_forward_fun_list(collect_input)
+    @staticmethod
+    def compare(bench_dir, cmp_dir, output_path, fmk):
+        bench_dataset_pack_path = os.path.join(bench_dir, DatasetChecker.target_name_in_zip)
+        cmp_dataset_pack_path = os.path.join(cmp_dir, DatasetChecker.target_name_in_zip)
+        df = compare_dataset(bench_dataset_pack_path, cmp_dataset_pack_path)
+        pass_check = False not in df['equal'].values
+        return DatasetChecker.target_name_in_zip, pass_check, df

msprobe/core/config_check/checkers/env_args_checker.py ADDED Viewed

@@ -0,0 +1,96 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import json
+import pandas as pd
+from msprobe.core.common.file_utils import load_json, load_yaml, create_file_with_content, create_file_in_zip
+from msprobe.core.config_check.checkers.base_checker import BaseChecker
+from msprobe.core.config_check.config_checker import register_checker_item
+from msprobe.core.config_check.utils.utils import config_checking_print
+from msprobe.core.common.const import Const
+dirpath = os.path.dirname(__file__)
+env_yaml_path = os.path.join(dirpath, "../resource/env.yaml")
+def collect_env_data():
+    result = {}
+    for key, value in os.environ.items():
+        result[key] = value
+    return result
+def get_device_type(env_json):
+    for key in env_json.keys():
+        if Const.ASCEND in key:
+            return Const.NPU_LOWERCASE
+    return Const.GPU_LOWERCASE
+def compare_env_data(npu_path, bench_path):
+    necessary_env = load_yaml(env_yaml_path)
+    cmp_data = load_json(npu_path)
+    cmp_type = get_device_type(cmp_data)
+    bench_data = load_json(bench_path)
+    bench_type = get_device_type(bench_data)
+    data = []
+    for _, value in necessary_env.items():
+        cmp_env = value.get(cmp_type)
+        bench_env = value.get(bench_type)
+        if not bench_env and not cmp_env:
+            continue
+        elif cmp_env:
+            cmp_env_name = cmp_env["name"]
+            cmp_value = cmp_data.get(cmp_env_name, value[cmp_type]["default_value"])
+            if not bench_env:
+                data.append(["only cmp has this env", cmp_env["name"], "", cmp_value, "warning"])
+                continue
+            bench_env_name = bench_env["name"]
+            bench_value = bench_data.get(bench_env_name, value[bench_type]["default_value"])
+            if cmp_value != bench_value:
+                data.append([bench_env_name, cmp_env_name, bench_value, cmp_value, "error"])
+        else:
+            bench_env_name = bench_env["name"]
+            bench_value = bench_data.get(bench_env_name) if bench_data.get(bench_env_name) else value[bench_type][
+                "default_value"]
+            data.append([bench_env_name, "only bench has this env", bench_value, "", "warning"])
+    df = pd.DataFrame(data, columns=EnvArgsChecker.result_header)
+    return df
+@register_checker_item("env")
+class EnvArgsChecker(BaseChecker):
+    target_name_in_zip = "env"
+    result_header = ["bench_env_name", "cmp_env_name", "bench_value", "cmp_value", "level"]
+    @staticmethod
+    def pack(pack_input):
+        output_zip_path = pack_input.output_zip_path
+        env_args_dict = collect_env_data()
+        create_file_in_zip(output_zip_path, EnvArgsChecker.target_name_in_zip, json.dumps(env_args_dict, indent=4))
+        config_checking_print(f"add env args to zip")
+    @staticmethod
+    def compare(bench_dir, cmp_dir, output_path, fmk):
+        bench_env_data = os.path.join(bench_dir, EnvArgsChecker.target_name_in_zip)
+        cmp_env_data = os.path.join(cmp_dir, EnvArgsChecker.target_name_in_zip)
+        df = compare_env_data(bench_env_data, cmp_env_data)
+        pass_check = "error" not in df['level'].values
+        return EnvArgsChecker.target_name_in_zip, pass_check, df

msprobe/core/config_check/checkers/hyperparameter_checker.py ADDED Viewed

@@ -0,0 +1,170 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import json
+from difflib import SequenceMatcher
+from typing import Union, List, Dict, Any
+import pandas as pd
+from msprobe.core.config_check.checkers.base_checker import BaseChecker
+from msprobe.core.config_check.config_checker import register_checker_item
+from msprobe.core.config_check.utils.utils import compare_dict, config_checking_print, update_dict
+from msprobe.core.config_check.utils.hyperparameter_parser import ParserFactory
+from msprobe.core.common.file_utils import (os_walk_for_files, create_file_in_zip, load_json, create_file_with_list,
+                                            FileOpen, load_yaml)
+from msprobe.core.common.const import Const
+dirpath = os.path.dirname(__file__)
+hyperparameters_path = os.path.join(dirpath, "../resource/hyperparameter.yaml")
+parameter_name_mapping = load_yaml(os.path.realpath(hyperparameters_path))
+hyperparameters_dict = {}
+@register_checker_item("hyperparameter")
+class HyperparameterChecker(BaseChecker):
+    target_name_in_zip = "hyperparameters"
+    result_header = ["file_name", "bench_para", "cmp_para", "bench_value", "cmp_value", "matched_with", "level"]
+    hyperparameters_file_list = ["hyperparameters_static.json", "hyperparameters_dynamic.json"]
+    @staticmethod
+    def pack(pack_input):
+        shell_path = pack_input.shell_path
+        output_zip_path = pack_input.output_zip_path
+        if shell_path:
+            if not isinstance(shell_path, list):
+                raise TypeError("shell_path should be a list of file paths.")
+            hyperparameters = {}
+            parser_factory = ParserFactory()
+            for script_path in shell_path:
+                if os.path.isfile(script_path):
+                    parser = parser_factory.get_parser(os.path.splitext(script_path)[1])
+                    update_dict(hyperparameters, parser.run(os.path.realpath(script_path)))
+                else:
+                    config_checking_print(f"Warning: Script path {script_path} is not a file.")
+            if hyperparameters:
+                create_file_in_zip(output_zip_path,
+                                   os.path.join(HyperparameterChecker.target_name_in_zip,
+                                                HyperparameterChecker.hyperparameters_file_list[0]),
+                                   json.dumps(hyperparameters, indent=4))
+                config_checking_print(f"add static hyperparameters args to zip")
+            else:
+                config_checking_print(f"Warning: Failed to extract hyperparameters from script {shell_path}")
+        if hyperparameters_dict:
+            create_file_in_zip(output_zip_path,
+                               os.path.join(HyperparameterChecker.target_name_in_zip,
+                                            HyperparameterChecker.hyperparameters_file_list[1]),
+                               json.dumps(vars(hyperparameters_dict), default=lambda x: None, indent=4))
+            config_checking_print(f"add dynamic hyperparameters args to zip")
+    @staticmethod
+    def compare(bench_dir, cmp_dir, output_path, fmk):
+        all_diffs = []
+        for file_name in HyperparameterChecker.hyperparameters_file_list:
+            bench_model_dir = os.path.join(bench_dir, HyperparameterChecker.target_name_in_zip, file_name)
+            cmp_model_dir = os.path.join(cmp_dir, HyperparameterChecker.target_name_in_zip, file_name)
+            if os.path.isfile(bench_model_dir) and os.path.isfile(cmp_model_dir):
+                bench_hyperparameters = load_json(bench_model_dir)
+                cmp_hyperparameters = load_json(cmp_model_dir)
+                all_diffs.extend(
+                    HyperparameterChecker.compare_param(bench_hyperparameters, cmp_hyperparameters, file_name))
+        df = pd.DataFrame(all_diffs, columns=HyperparameterChecker.result_header)
+        pass_check = "error" not in df["level"].values
+        return HyperparameterChecker.target_name_in_zip, pass_check, df
+    @staticmethod
+    def compare_param(bench_params, cmp_params, file_name):
+        all_diffs = []
+        bench_param_names = bench_params.keys()
+        for bench_param_name in bench_param_names:
+            matched_cmp_param_name, matched_with = HyperparameterChecker._fuzzy_match_parameter(bench_param_name,
+                                                                                                cmp_params)
+            bench_param_value = bench_params[bench_param_name]
+            if matched_cmp_param_name:
+                cmp_param_value = cmp_params[matched_cmp_param_name]
+                if bench_param_value != cmp_param_value:
+                    all_diffs.append(
+                        [file_name, bench_param_name, matched_cmp_param_name, bench_param_value, cmp_param_value,
+                         matched_with, "error"])
+                del cmp_params[matched_cmp_param_name]
+            else:
+                all_diffs.append(
+                    [file_name, bench_param_name, "Only in benchmark", bench_param_value, "", "", "warning"])
+        for cmp_param_name, cmp_param_value in cmp_params.items():
+            all_diffs.append([file_name, "Only in comparison", cmp_param_name, "", cmp_param_value, "", "warning"])
+        all_diffs.sort()
+        return all_diffs
+    @staticmethod
+    def apply_patches(fmk):
+        try:
+            from megatron import training
+            def collect_hyperparameter_wrapper(func):
+                def wrapper(*args, **kwargs):
+                    global hyperparameters_dict
+                    result = func(*args, **kwargs)
+                    if not hyperparameters_dict:
+                        hyperparameters_dict = result
+                    return result
+                return wrapper
+            training.get_args = collect_hyperparameter_wrapper(training.get_args)
+        except ImportError:
+            config_checking_print("No megatron find.")
+        except Exception as e:
+            config_checking_print(f"Patch megatron method failed, detail:{str(e)}")
+    @staticmethod
+    def _fuzzy_match_parameter(param_name: str, available_params: Dict[str, Any]):
+        """
+        Fuzzy matches a parameter name against available parameter names using predefined
+        mappings and string similarity.
+        """
+        if param_name in available_params:
+            return param_name, Const.MATCH_MODE_NAME
+        canonical_name = None
+        for standard_name, aliases in parameter_name_mapping.items():
+            if param_name == standard_name or param_name in aliases:
+                canonical_name = standard_name
+                break
+        if canonical_name:
+            if canonical_name in available_params:
+                return canonical_name, Const.MATCH_MODE_MAPPING
+            for alias in parameter_name_mapping[canonical_name]:
+                if alias in available_params:
+                    config_checking_print(
+                        f"Matched '{param_name}' to alias '{alias}' via canonical name '{canonical_name}'")
+                    return alias, Const.MATCH_MODE_MAPPING
+        best_match_name = None
+        best_match_ratio = 0.8
+        for available_param_name in available_params:
+            ratio = SequenceMatcher(None, param_name.lower(), available_param_name.lower()).ratio()
+            if ratio > best_match_ratio:
+                best_match_ratio = ratio
+                best_match_name = available_param_name
+        if best_match_name:
+            config_checking_print(
+                f"Fuzzy matched parameter '{param_name}' to '{best_match_name}' (similarity: {best_match_ratio:.2f})")
+            return best_match_name, f"{Const.MATCH_MODE_SIMILARITY}:{best_match_ratio}"
+        return None, None

msprobe/core/config_check/checkers/pip_checker.py ADDED Viewed

@@ -0,0 +1,90 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import pandas as pd
+try:
+    import importlib.metadata as metadata
+except ImportError:
+    import importlib_metadata as metadata
+from msprobe.core.common.file_utils import load_yaml, create_file_in_zip
+from msprobe.core.config_check.checkers.base_checker import BaseChecker
+from msprobe.core.config_check.config_checker import register_checker_item
+from msprobe.core.config_check.utils.utils import config_checking_print
+from msprobe.core.common.file_utils import FileOpen, save_excel
+dirpath = os.path.dirname(__file__)
+depend_path = os.path.join(dirpath, "../resource/dependency.yaml")
+def load_pip_txt(file_path):
+    output_dir = {}
+    with FileOpen(file_path, 'r', encoding='utf-8') as file:
+        lines = file.readlines()
+        for line in lines:
+            info_list = line.strip().split("=")
+            output_dir[info_list[0]] = "" if len(info_list) != 2 else info_list[1]
+    return output_dir
+def collect_pip_data():
+    result = ""
+    packages = metadata.distributions()
+    for pkg in packages:
+        if pkg.metadata:
+            result += f"{pkg.metadata.get('Name')}={pkg.version}\n"
+    return result
+def compare_pip_data(bench_pip_path, cmp_pip_path, fmk):
+    necessary_dependency = load_yaml(depend_path)["dependency"]
+    necessary_dependency.append(fmk)
+    bench_data = load_pip_txt(bench_pip_path)
+    cmp_data = load_pip_txt(cmp_pip_path)
+    data = []
+    for package in necessary_dependency:
+        bench_version = bench_data.get(package)
+        cmp_version = cmp_data.get(package)
+        if bench_version != cmp_version:
+            data.append([package, bench_version if bench_version else 'None',
+                         cmp_version if cmp_version else 'None',
+                         "error"])
+    df = pd.DataFrame(data, columns=PipPackageChecker.result_header)
+    return df
+@register_checker_item("pip")
+class PipPackageChecker(BaseChecker):
+    target_name_in_zip = "pip"
+    result_header = ['package', 'bench version', 'cmp version', 'level']
+    @staticmethod
+    def pack(pack_input):
+        output_zip_path = pack_input.output_zip_path
+        pip_data = collect_pip_data()
+        create_file_in_zip(output_zip_path, PipPackageChecker.target_name_in_zip, pip_data)
+        config_checking_print(f"add pip info to zip")
+    @staticmethod
+    def compare(bench_dir, cmp_dir, output_path, fmk):
+        bench_pip_path = os.path.join(bench_dir, PipPackageChecker.target_name_in_zip)
+        cmp_pip_path = os.path.join(cmp_dir, PipPackageChecker.target_name_in_zip)
+        df = compare_pip_data(bench_pip_path, cmp_pip_path, fmk)
+        pass_check = "error" not in df['level'].values
+        return PipPackageChecker.target_name_in_zip, pass_check, df

mindstudio-probe 1.3.0__py3-none-any.whl → 8.1.1__py3-none-any.whl

mindstudio-probe 1.3.0py3-none-any.whl → 8.1.1py3-none-any.whl