algobench-sdk 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- algobench/__init__.py +25 -0
- algobench/api.py +123 -0
- algobench/cli.py +181 -0
- algobench/config/__init__.py +0 -0
- algobench/config/analysis_criteria.py +79 -0
- algobench/config/business.py +37 -0
- algobench/config/metric_keywords.py +108 -0
- algobench/config/metric_status.py +56 -0
- algobench/config/metrics.py +58 -0
- algobench/config/thresholds.py +34 -0
- algobench/decision/__init__.py +0 -0
- algobench/decision/engine.py +239 -0
- algobench/decision/evaluator.py +230 -0
- algobench/exceptions.py +23 -0
- algobench/models.py +198 -0
- algobench/parsers/__init__.py +0 -0
- algobench/parsers/csv_parser.py +119 -0
- algobench/stats/__init__.py +0 -0
- algobench/stats/core.py +199 -0
- algobench/stats/diagnosis.py +147 -0
- algobench/stats/qvalue.py +30 -0
- algobench/stats/sample_processing.py +93 -0
- algobench/stats/tests/__init__.py +0 -0
- algobench/stats/tests/data_type.py +88 -0
- algobench/stats/tests/effect_size.py +91 -0
- algobench/stats/tests/nonparametric.py +73 -0
- algobench/stats/tests/smart.py +235 -0
- algobench/stats/tests/t_tests.py +110 -0
- algobench/utils/__init__.py +0 -0
- algobench/utils/decision_logic.py +32 -0
- algobench/utils/improvement.py +70 -0
- algobench/utils/math_utils.py +97 -0
- algobench/utils/numbers.py +40 -0
- algobench/utils/quality_level.py +20 -0
- algobench_sdk-1.0.0.dist-info/METADATA +114 -0
- algobench_sdk-1.0.0.dist-info/RECORD +39 -0
- algobench_sdk-1.0.0.dist-info/WHEEL +5 -0
- algobench_sdk-1.0.0.dist-info/entry_points.txt +2 -0
- algobench_sdk-1.0.0.dist-info/top_level.txt +1 -0
algobench/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""AlgoBench - 算法基准测试分析工具的 Python SDK 与 CLI。"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__version__ = "1.0.0"
|
|
6
|
+
|
|
7
|
+
from algobench.api import analyze, compute_statistics, make_decision
|
|
8
|
+
from algobench.models import (
|
|
9
|
+
AnalysisResult,
|
|
10
|
+
Decision,
|
|
11
|
+
EvaluationSummary,
|
|
12
|
+
MetricEvaluation,
|
|
13
|
+
StatisticsResult,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"analyze",
|
|
18
|
+
"compute_statistics",
|
|
19
|
+
"make_decision",
|
|
20
|
+
"AnalysisResult",
|
|
21
|
+
"Decision",
|
|
22
|
+
"EvaluationSummary",
|
|
23
|
+
"MetricEvaluation",
|
|
24
|
+
"StatisticsResult",
|
|
25
|
+
]
|
algobench/api.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""AlgoBench 顶层 API,对应前端的一站式分析流程。"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from algobench.models import AnalysisResult, StatisticsResult, EvaluationSummary, Decision, MetricEvaluation
|
|
6
|
+
from algobench.parsers.csv_parser import parse_csv, parse_csv_file
|
|
7
|
+
from algobench.config.metrics import get_metric_config, get_better_direction
|
|
8
|
+
from algobench.stats.core import compute_statistics
|
|
9
|
+
from algobench.stats.diagnosis import check_data_quality
|
|
10
|
+
from algobench.decision.engine import build_evaluation_summary
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def analyze(
|
|
14
|
+
input_source: str,
|
|
15
|
+
baseline: str = "Baseline",
|
|
16
|
+
compare: str = "New",
|
|
17
|
+
metrics: list[str] | None = None,
|
|
18
|
+
selected_cases: list[str] | None = None,
|
|
19
|
+
criteria_id: str = "standard",
|
|
20
|
+
custom_thresholds: dict | None = None,
|
|
21
|
+
metric_configs: dict | None = None,
|
|
22
|
+
) -> AnalysisResult:
|
|
23
|
+
"""一站式分析:解析 → 统计 → 决策。
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
input_source: CSV 文件路径或 CSV 字符串
|
|
27
|
+
baseline: 基线算法名
|
|
28
|
+
compare: 对比算法名
|
|
29
|
+
metrics: 要分析的指标列表,None 表示全部
|
|
30
|
+
selected_cases: 要分析的用例列表,None 表示全部
|
|
31
|
+
criteria_id: 分析标准预设 ID
|
|
32
|
+
custom_thresholds: 自定义阈值
|
|
33
|
+
metric_configs: 用户指标配置
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
AnalysisResult 包含完整分析结果
|
|
37
|
+
"""
|
|
38
|
+
# 1. 解析数据
|
|
39
|
+
if input_source.endswith(".csv"):
|
|
40
|
+
parsed = parse_csv_file(input_source, selected_cases)
|
|
41
|
+
else:
|
|
42
|
+
parsed = parse_csv(input_source, selected_cases)
|
|
43
|
+
|
|
44
|
+
data = parsed["data"]
|
|
45
|
+
algos = parsed["algos"]
|
|
46
|
+
available_metrics = parsed["metrics"]
|
|
47
|
+
|
|
48
|
+
if not data or len(algos) < 2:
|
|
49
|
+
return AnalysisResult(parsed=parsed)
|
|
50
|
+
|
|
51
|
+
# 确定要分析的指标
|
|
52
|
+
target_metrics = [m for m in (metrics or available_metrics) if m in available_metrics]
|
|
53
|
+
|
|
54
|
+
# 2. 数据质量检查
|
|
55
|
+
quality = check_data_quality(data, algos, target_metrics)
|
|
56
|
+
|
|
57
|
+
# 3. 逐指标统计计算
|
|
58
|
+
statistics_results: dict[str, StatisticsResult] = {}
|
|
59
|
+
for metric in target_metrics:
|
|
60
|
+
direction = get_better_direction(metric, metric_configs)
|
|
61
|
+
stats = compute_statistics(data, metric, baseline, compare, direction, selected_cases)
|
|
62
|
+
statistics_results[metric] = stats
|
|
63
|
+
|
|
64
|
+
# 4. 构建评估摘要(包含决策)
|
|
65
|
+
dataset_rows = parsed.get("filteredCases", 0) + len(data)
|
|
66
|
+
evaluation = build_evaluation_summary(
|
|
67
|
+
stats_results=statistics_results,
|
|
68
|
+
metric_configs=metric_configs,
|
|
69
|
+
criteria_id=criteria_id,
|
|
70
|
+
custom_thresholds=custom_thresholds,
|
|
71
|
+
n_selected_cases=len(data),
|
|
72
|
+
dataset_rows=dataset_rows,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# 5. 提取决策
|
|
76
|
+
decision = Decision(
|
|
77
|
+
status=evaluation.decision_status,
|
|
78
|
+
label=evaluation.decision_label,
|
|
79
|
+
reason=evaluation.decision_reason,
|
|
80
|
+
blockers=evaluation.blockers,
|
|
81
|
+
recommendations=evaluation.recommendations,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return AnalysisResult(
|
|
85
|
+
parsed=parsed,
|
|
86
|
+
statistics=statistics_results,
|
|
87
|
+
quality=quality,
|
|
88
|
+
evaluation=evaluation,
|
|
89
|
+
decision=decision,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def compute_statistics_only(
|
|
94
|
+
input_source: str,
|
|
95
|
+
metric: str,
|
|
96
|
+
baseline: str = "Baseline",
|
|
97
|
+
compare: str = "New",
|
|
98
|
+
selected_cases: list[str] | None = None,
|
|
99
|
+
metric_configs: dict | None = None,
|
|
100
|
+
) -> StatisticsResult:
|
|
101
|
+
"""仅计算单指标统计。"""
|
|
102
|
+
if input_source.endswith(".csv"):
|
|
103
|
+
parsed = parse_csv_file(input_source, selected_cases)
|
|
104
|
+
else:
|
|
105
|
+
parsed = parse_csv(input_source, selected_cases)
|
|
106
|
+
|
|
107
|
+
direction = get_better_direction(metric, metric_configs)
|
|
108
|
+
return compute_statistics(parsed["data"], metric, baseline, compare, direction, selected_cases)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def make_decision(
|
|
112
|
+
stats_results: dict[str, StatisticsResult],
|
|
113
|
+
metric_configs: dict | None = None,
|
|
114
|
+
criteria_id: str = "standard",
|
|
115
|
+
custom_thresholds: dict | None = None,
|
|
116
|
+
) -> EvaluationSummary:
|
|
117
|
+
"""基于已有统计结果生成决策。"""
|
|
118
|
+
return build_evaluation_summary(
|
|
119
|
+
stats_results=stats_results,
|
|
120
|
+
metric_configs=metric_configs,
|
|
121
|
+
criteria_id=criteria_id,
|
|
122
|
+
custom_thresholds=custom_thresholds,
|
|
123
|
+
)
|
algobench/cli.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""AlgoBench CLI 入口。"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
from dataclasses import asdict
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _serialize(obj):
|
|
13
|
+
"""递归序列化 dataclass 为 dict。"""
|
|
14
|
+
if hasattr(obj, "__dataclass_fields__"):
|
|
15
|
+
return {k: _serialize(v) for k, v in asdict(obj).items()}
|
|
16
|
+
if isinstance(obj, list):
|
|
17
|
+
return [_serialize(item) for item in obj]
|
|
18
|
+
if isinstance(obj, dict):
|
|
19
|
+
return {k: _serialize(v) for k, v in obj.items()}
|
|
20
|
+
return obj
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def cmd_analyze(args):
|
|
24
|
+
from algobench.api import analyze
|
|
25
|
+
metrics = args.metrics.split(",") if args.metrics else None
|
|
26
|
+
result = analyze(
|
|
27
|
+
input_source=args.input,
|
|
28
|
+
baseline=args.baseline,
|
|
29
|
+
compare=args.compare,
|
|
30
|
+
metrics=metrics,
|
|
31
|
+
criteria_id=args.criteria,
|
|
32
|
+
)
|
|
33
|
+
output = {
|
|
34
|
+
"decision": _serialize(result.decision),
|
|
35
|
+
"statistics": {k: _serialize(v) for k, v in result.statistics.items()},
|
|
36
|
+
"quality": result.quality,
|
|
37
|
+
"evaluation": _serialize(result.evaluation),
|
|
38
|
+
}
|
|
39
|
+
_print_output(output, args.format)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def cmd_stats(args):
|
|
43
|
+
from algobench.api import compute_statistics_only
|
|
44
|
+
result = compute_statistics_only(
|
|
45
|
+
input_source=args.input,
|
|
46
|
+
metric=args.metric,
|
|
47
|
+
baseline=args.baseline,
|
|
48
|
+
compare=args.compare,
|
|
49
|
+
)
|
|
50
|
+
_print_output(_serialize(result), args.format)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def cmd_quality(args):
|
|
54
|
+
from algobench.parsers.csv_parser import parse_csv_file
|
|
55
|
+
from algobench.stats.diagnosis import check_data_quality
|
|
56
|
+
parsed = parse_csv_file(args.input)
|
|
57
|
+
result = check_data_quality(parsed["data"], parsed["algos"], parsed["metrics"])
|
|
58
|
+
_print_output(result, args.format)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def cmd_diagnose(args):
|
|
62
|
+
from algobench.parsers.csv_parser import parse_csv_file
|
|
63
|
+
from algobench.stats.diagnosis import diagnose_data_issues
|
|
64
|
+
parsed = parse_csv_file(args.input)
|
|
65
|
+
issues = diagnose_data_issues(parsed["data"], parsed["algos"], parsed["metrics"])
|
|
66
|
+
_print_output(issues, args.format)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def cmd_validate(args):
|
|
70
|
+
from algobench.parsers.csv_parser import parse_csv_file
|
|
71
|
+
parsed = parse_csv_file(args.input)
|
|
72
|
+
result = {
|
|
73
|
+
"valid": bool(parsed["data"] and len(parsed["algos"]) >= 2),
|
|
74
|
+
"total_cases": len(parsed["data"]),
|
|
75
|
+
"algos": parsed["algos"],
|
|
76
|
+
"metrics": parsed["metrics"],
|
|
77
|
+
"meta_columns": parsed["metaColumns"],
|
|
78
|
+
"param_columns": parsed["paramColumns"],
|
|
79
|
+
}
|
|
80
|
+
_print_output(result, args.format)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _print_output(data, fmt: str = "json"):
|
|
84
|
+
if fmt == "json":
|
|
85
|
+
print(json.dumps(data, indent=2, ensure_ascii=False))
|
|
86
|
+
elif fmt == "table":
|
|
87
|
+
_print_table(data)
|
|
88
|
+
else:
|
|
89
|
+
print(json.dumps(data, indent=2, ensure_ascii=False))
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _print_table(data):
|
|
93
|
+
if isinstance(data, dict) and "decision" in data:
|
|
94
|
+
d = data["decision"]
|
|
95
|
+
print(f"决策: {d['label']} ({d['status']})")
|
|
96
|
+
print(f"原因: {d['reason']}")
|
|
97
|
+
if d.get("blockers"):
|
|
98
|
+
print("阻塞项:")
|
|
99
|
+
for b in d["blockers"]:
|
|
100
|
+
print(f" - {b}")
|
|
101
|
+
if d.get("recommendations"):
|
|
102
|
+
print("建议:")
|
|
103
|
+
for r in d["recommendations"]:
|
|
104
|
+
print(f" - {r}")
|
|
105
|
+
if "statistics" in data:
|
|
106
|
+
print("\n统计结果:")
|
|
107
|
+
for metric, stats in data["statistics"].items():
|
|
108
|
+
imp = stats.get("mean_imp")
|
|
109
|
+
p = stats.get("p_value")
|
|
110
|
+
n = stats.get("n_valid")
|
|
111
|
+
print(f" {metric}: 改进率={imp}%, p值={p}, 样本量={n}")
|
|
112
|
+
elif isinstance(data, dict) and "score" in data:
|
|
113
|
+
print(f"数据质量: {data['score']}分 ({data['level']})")
|
|
114
|
+
if data.get("issues"):
|
|
115
|
+
for issue in data["issues"]:
|
|
116
|
+
print(f" [{issue['severity']}] {issue['message']}")
|
|
117
|
+
else:
|
|
118
|
+
print(json.dumps(data, indent=2, ensure_ascii=False))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def main():
|
|
122
|
+
parser = argparse.ArgumentParser(prog="algobench", description="AlgoBench - 算法基准测试分析工具")
|
|
123
|
+
sub = parser.add_subparsers(dest="command", help="可用命令")
|
|
124
|
+
|
|
125
|
+
# analyze
|
|
126
|
+
p_analyze = sub.add_parser("analyze", help="完整分析流程")
|
|
127
|
+
p_analyze.add_argument("input", help="CSV 文件路径")
|
|
128
|
+
p_analyze.add_argument("-b", "--baseline", default="Baseline", help="基线算法名")
|
|
129
|
+
p_analyze.add_argument("-c", "--compare", default="New", help="对比算法名")
|
|
130
|
+
p_analyze.add_argument("-m", "--metrics", default=None, help="指标列表(逗号分隔)")
|
|
131
|
+
p_analyze.add_argument("--criteria", default="standard", choices=["exploratory", "standard", "strict"], help="分析标准")
|
|
132
|
+
p_analyze.add_argument("-f", "--format", default="table", choices=["json", "table"], help="输出格式")
|
|
133
|
+
|
|
134
|
+
# stats
|
|
135
|
+
p_stats = sub.add_parser("stats", help="单指标统计计算")
|
|
136
|
+
p_stats.add_argument("input", help="CSV 文件路径")
|
|
137
|
+
p_stats.add_argument("-b", "--baseline", default="Baseline", help="基线算法名")
|
|
138
|
+
p_stats.add_argument("-c", "--compare", default="New", help="对比算法名")
|
|
139
|
+
p_stats.add_argument("-m", "--metric", required=True, help="指标名")
|
|
140
|
+
p_stats.add_argument("-f", "--format", default="json", choices=["json", "table"], help="输出格式")
|
|
141
|
+
|
|
142
|
+
# quality
|
|
143
|
+
p_quality = sub.add_parser("quality", help="数据质量检查")
|
|
144
|
+
p_quality.add_argument("input", help="CSV 文件路径")
|
|
145
|
+
p_quality.add_argument("-f", "--format", default="table", choices=["json", "table"], help="输出格式")
|
|
146
|
+
|
|
147
|
+
# diagnose
|
|
148
|
+
p_diagnose = sub.add_parser("diagnose", help="数据诊断")
|
|
149
|
+
p_diagnose.add_argument("input", help="CSV 文件路径")
|
|
150
|
+
p_diagnose.add_argument("-f", "--format", default="json", choices=["json", "table"], help="输出格式")
|
|
151
|
+
|
|
152
|
+
# validate
|
|
153
|
+
p_validate = sub.add_parser("validate", help="CSV 格式验证")
|
|
154
|
+
p_validate.add_argument("input", help="CSV 文件路径")
|
|
155
|
+
p_validate.add_argument("-f", "--format", default="json", choices=["json", "table"], help="输出格式")
|
|
156
|
+
|
|
157
|
+
args = parser.parse_args()
|
|
158
|
+
if not args.command:
|
|
159
|
+
parser.print_help()
|
|
160
|
+
sys.exit(1)
|
|
161
|
+
|
|
162
|
+
commands = {
|
|
163
|
+
"analyze": cmd_analyze,
|
|
164
|
+
"stats": cmd_stats,
|
|
165
|
+
"quality": cmd_quality,
|
|
166
|
+
"diagnose": cmd_diagnose,
|
|
167
|
+
"validate": cmd_validate,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
commands[args.command](args)
|
|
172
|
+
except FileNotFoundError:
|
|
173
|
+
print(f"错误: 文件不存在 - {args.input}", file=sys.stderr)
|
|
174
|
+
sys.exit(1)
|
|
175
|
+
except Exception as e:
|
|
176
|
+
print(f"错误: {e}", file=sys.stderr)
|
|
177
|
+
sys.exit(1)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
if __name__ == "__main__":
|
|
181
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""分析标准预设,对应 JS 的 src/config/analysisCriteria.js。"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
DEFAULT_CRITERIA_ID = "standard"
|
|
6
|
+
|
|
7
|
+
ANALYSIS_CRITERIA_PRESETS = {
|
|
8
|
+
"exploratory": {
|
|
9
|
+
"id": "exploratory",
|
|
10
|
+
"label": "探索模式",
|
|
11
|
+
"description": "识别更多潜在的改进机会",
|
|
12
|
+
"thresholds": {
|
|
13
|
+
"minMeaningfulImprovement": 2,
|
|
14
|
+
"degradationWarning": 0.25,
|
|
15
|
+
"degradationCritical": 0.35,
|
|
16
|
+
"effectSize": {"NEGLIGIBLE": 0.15, "SMALL": 0.35, "MEDIUM": 0.55},
|
|
17
|
+
"reliabilityFactors": {
|
|
18
|
+
"significance": {"high": 0.05, "medium": 0.1, "marginal": 0.15},
|
|
19
|
+
"sample": {"large": 15, "medium": 8, "small": 5},
|
|
20
|
+
},
|
|
21
|
+
},
|
|
22
|
+
},
|
|
23
|
+
"standard": {
|
|
24
|
+
"id": "standard",
|
|
25
|
+
"label": "标准模式",
|
|
26
|
+
"description": "获取可靠结论以支持决策",
|
|
27
|
+
"thresholds": {
|
|
28
|
+
"minMeaningfulImprovement": 3,
|
|
29
|
+
"degradationWarning": 0.2,
|
|
30
|
+
"degradationCritical": 0.3,
|
|
31
|
+
"effectSize": {"NEGLIGIBLE": 0.2, "SMALL": 0.5, "MEDIUM": 0.8},
|
|
32
|
+
"reliabilityFactors": {
|
|
33
|
+
"significance": {"high": 0.01, "medium": 0.05, "marginal": 0.1},
|
|
34
|
+
"sample": {"large": 30, "medium": 15, "small": 8},
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
"strict": {
|
|
39
|
+
"id": "strict",
|
|
40
|
+
"label": "严格模式",
|
|
41
|
+
"description": "发布结果或制定关键决策",
|
|
42
|
+
"thresholds": {
|
|
43
|
+
"minMeaningfulImprovement": 4,
|
|
44
|
+
"degradationWarning": 0.15,
|
|
45
|
+
"degradationCritical": 0.25,
|
|
46
|
+
"effectSize": {"NEGLIGIBLE": 0.25, "SMALL": 0.6, "MEDIUM": 0.9},
|
|
47
|
+
"reliabilityFactors": {
|
|
48
|
+
"significance": {"high": 0.005, "medium": 0.01, "marginal": 0.05},
|
|
49
|
+
"sample": {"large": 40, "medium": 25, "small": 15},
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_criteria_preset(criteria_id: str) -> dict:
|
|
57
|
+
return ANALYSIS_CRITERIA_PRESETS.get(criteria_id, ANALYSIS_CRITERIA_PRESETS[DEFAULT_CRITERIA_ID])
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_effective_thresholds(
|
|
61
|
+
criteria_id: str = DEFAULT_CRITERIA_ID,
|
|
62
|
+
custom_thresholds: dict | None = None,
|
|
63
|
+
) -> dict:
|
|
64
|
+
"""获取生效的阈值配置。本地自定义 > 预设。"""
|
|
65
|
+
base = get_criteria_preset(criteria_id)["thresholds"]
|
|
66
|
+
if not custom_thresholds:
|
|
67
|
+
return base
|
|
68
|
+
|
|
69
|
+
merged = {**base, **{k: v for k, v in custom_thresholds.items() if k not in ("effectSize", "reliabilityFactors")}}
|
|
70
|
+
merged["effectSize"] = {**base.get("effectSize", {}), **(custom_thresholds.get("effectSize") or {})}
|
|
71
|
+
base_rf = base.get("reliabilityFactors", {})
|
|
72
|
+
custom_rf = custom_thresholds.get("reliabilityFactors") or {}
|
|
73
|
+
merged["reliabilityFactors"] = {
|
|
74
|
+
**base_rf,
|
|
75
|
+
**{k: v for k, v in custom_rf.items() if k not in ("sample", "significance")},
|
|
76
|
+
"sample": {**base_rf.get("sample", {}), **(custom_rf.get("sample") or {})},
|
|
77
|
+
"significance": {**base_rf.get("significance", {}), **(custom_rf.get("significance") or {})},
|
|
78
|
+
}
|
|
79
|
+
return merged
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""业务规则配置,对应 JS 的 src/config/business.js。"""
|
|
2
|
+
|
|
3
|
+
METRIC_SEPARATOR = "/"
|
|
4
|
+
PARAM_PREFIX = "p_"
|
|
5
|
+
META_PREFIX = "#"
|
|
6
|
+
|
|
7
|
+
# 用例列名候选(不区分大小写匹配)
|
|
8
|
+
CASE_COLUMN_NAMES = [
|
|
9
|
+
"case", "benchmark", "test", "circuit", "design",
|
|
10
|
+
"instance", "sample", "experiment", "dataset", "workload",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def is_case_column(header: str) -> bool:
|
|
15
|
+
return header.strip().lower() in CASE_COLUMN_NAMES
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_meta_column(header: str) -> bool:
|
|
19
|
+
return header.startswith(META_PREFIX)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def is_param_column(header: str) -> bool:
|
|
23
|
+
return header.startswith(PARAM_PREFIX)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def is_metric_column(header: str) -> bool:
|
|
27
|
+
return METRIC_SEPARATOR in header
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def parse_metric_column(header: str) -> tuple[str, str] | None:
|
|
31
|
+
"""解析指标列名,返回 (算法名, 指标名)。"""
|
|
32
|
+
if not is_metric_column(header):
|
|
33
|
+
return None
|
|
34
|
+
parts = header.split(METRIC_SEPARATOR, 1)
|
|
35
|
+
if len(parts) == 2 and parts[0].strip() and parts[1].strip():
|
|
36
|
+
return parts[0].strip(), parts[1].strip()
|
|
37
|
+
return None
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""指标关键词库,对应 JS 的 src/config/metricKeywords.js。
|
|
2
|
+
|
|
3
|
+
用于根据指标名称推断其优化方向(higher/lower/target)。
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
|
|
10
|
+
# 默认关键词库(简化版,保留高频使用的条目)
|
|
11
|
+
DEFAULT_KEYWORD_LIBRARY: dict[str, dict] = {
|
|
12
|
+
# 机器学习指标
|
|
13
|
+
"accuracy": {"better": "higher", "description": "准确率"},
|
|
14
|
+
"precision": {"better": "higher", "description": "精确率"},
|
|
15
|
+
"recall": {"better": "higher", "description": "召回率"},
|
|
16
|
+
"f1": {"better": "higher", "description": "F1分数"},
|
|
17
|
+
"auc": {"better": "higher", "description": "AUC值"},
|
|
18
|
+
"map": {"better": "higher", "description": "平均精度均值"},
|
|
19
|
+
"iou": {"better": "higher", "description": "交并比"},
|
|
20
|
+
"dice": {"better": "higher", "description": "Dice系数"},
|
|
21
|
+
"bleu": {"better": "higher", "description": "BLEU分数"},
|
|
22
|
+
"rouge": {"better": "higher", "description": "ROUGE分数"},
|
|
23
|
+
"perplexity": {"better": "lower", "description": "困惑度"},
|
|
24
|
+
"loss": {"better": "lower", "description": "损失"},
|
|
25
|
+
"error": {"better": "lower", "description": "错误率"},
|
|
26
|
+
"ce": {"better": "lower", "description": "交叉熵"},
|
|
27
|
+
|
|
28
|
+
# 得分/排名
|
|
29
|
+
"score": {"better": "higher", "description": "得分"},
|
|
30
|
+
"rating": {"better": "higher", "description": "评分"},
|
|
31
|
+
"rank": {"better": "lower", "description": "排名"},
|
|
32
|
+
|
|
33
|
+
# 性能指标
|
|
34
|
+
"throughput": {"better": "higher", "description": "吞吐量"},
|
|
35
|
+
"fps": {"better": "higher", "description": "帧率"},
|
|
36
|
+
"qps": {"better": "higher", "description": "每秒查询数"},
|
|
37
|
+
"tps": {"better": "higher", "description": "每秒事务数"},
|
|
38
|
+
"speedup": {"better": "higher", "description": "加速比"},
|
|
39
|
+
"speed": {"better": "higher", "description": "速度"},
|
|
40
|
+
"rate": {"better": "higher", "description": "速率"},
|
|
41
|
+
"bandwidth": {"better": "higher", "description": "带宽"},
|
|
42
|
+
"iops": {"better": "higher", "description": "每秒IO操作"},
|
|
43
|
+
|
|
44
|
+
# 质量/效率
|
|
45
|
+
"efficiency": {"better": "higher", "description": "效率"},
|
|
46
|
+
"quality": {"better": "higher", "description": "质量"},
|
|
47
|
+
"performance": {"better": "higher", "description": "性能"},
|
|
48
|
+
"improvement": {"better": "higher", "description": "改进"},
|
|
49
|
+
"gain": {"better": "higher", "description": "增益"},
|
|
50
|
+
"coverage": {"better": "higher", "description": "覆盖率"},
|
|
51
|
+
|
|
52
|
+
# EDA / 芯片设计
|
|
53
|
+
"hpwl": {"better": "lower", "description": "半周长线长"},
|
|
54
|
+
"wirelength": {"better": "lower", "description": "线长"},
|
|
55
|
+
"wl": {"better": "lower", "description": "线长"},
|
|
56
|
+
"area": {"better": "lower", "description": "面积"},
|
|
57
|
+
"power": {"better": "lower", "description": "功耗"},
|
|
58
|
+
"energy": {"better": "lower", "description": "能耗"},
|
|
59
|
+
"latency": {"better": "lower", "description": "延迟"},
|
|
60
|
+
"runtime": {"better": "lower", "description": "运行时间"},
|
|
61
|
+
"runtime_s": {"better": "lower", "description": "运行时间(秒)"},
|
|
62
|
+
"runtime_ms": {"better": "lower", "description": "运行时间(毫秒)"},
|
|
63
|
+
"time": {"better": "lower", "description": "时间"},
|
|
64
|
+
"memory": {"better": "lower", "description": "内存"},
|
|
65
|
+
"delay": {"better": "lower", "description": "延迟"},
|
|
66
|
+
"wns": {"better": "higher", "description": "最差负裕量"},
|
|
67
|
+
"tns": {"better": "higher", "description": "总负裕量"},
|
|
68
|
+
"slack": {"better": "higher", "description": "裕量"},
|
|
69
|
+
"density": {"better": "lower", "description": "密度"},
|
|
70
|
+
"overflow": {"better": "lower", "description": "溢出"},
|
|
71
|
+
"congestion": {"better": "lower", "description": "拥塞"},
|
|
72
|
+
"violations": {"better": "lower", "description": "违规数"},
|
|
73
|
+
"drc": {"better": "lower", "description": "DRC违规"},
|
|
74
|
+
"iterations": {"better": "lower", "description": "迭代次数"},
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _build_pattern(keyword: str) -> re.Pattern:
|
|
79
|
+
escaped = re.escape(keyword).replace(r"\?", ".?")
|
|
80
|
+
return re.compile(rf"({escaped})", re.IGNORECASE)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def match_keyword(metric_name: str, library: dict | None = None) -> dict | None:
|
|
84
|
+
"""在关键词库中匹配指标名称,返回匹配到的配置或 None。"""
|
|
85
|
+
if not metric_name or not isinstance(metric_name, str):
|
|
86
|
+
return None
|
|
87
|
+
lib = library or DEFAULT_KEYWORD_LIBRARY
|
|
88
|
+
name_lower = metric_name.lower()
|
|
89
|
+
for keyword, config in lib.items():
|
|
90
|
+
if keyword.lower() in name_lower:
|
|
91
|
+
return {"keyword": keyword, **config}
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def infer_better_direction(metric_name: str) -> dict:
|
|
96
|
+
"""根据指标名称推断优化方向。"""
|
|
97
|
+
if not metric_name or not isinstance(metric_name, str):
|
|
98
|
+
return {"better": None, "target": None, "description": "", "is_keyword_matched": False}
|
|
99
|
+
matched = match_keyword(metric_name)
|
|
100
|
+
if matched:
|
|
101
|
+
return {
|
|
102
|
+
"better": matched["better"],
|
|
103
|
+
"target": matched.get("target"),
|
|
104
|
+
"description": matched.get("description", ""),
|
|
105
|
+
"is_keyword_matched": True,
|
|
106
|
+
"matched_keyword": matched["keyword"],
|
|
107
|
+
}
|
|
108
|
+
return {"better": None, "target": None, "description": "", "is_keyword_matched": False}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""指标状态定义,对应 JS 的 src/config/metricStatus.js。"""
|
|
2
|
+
|
|
3
|
+
METRIC_STATUS = {
|
|
4
|
+
"SIGNIFICANT_IMPROVEMENT": "significant_improvement",
|
|
5
|
+
"IMPROVEMENT_TREND": "improvement_trend",
|
|
6
|
+
"NEUTRAL": "neutral",
|
|
7
|
+
"RISKY": "risky",
|
|
8
|
+
"DEGRADATION": "degradation",
|
|
9
|
+
"SIGNIFICANT_DEGRADATION": "significant_degradation",
|
|
10
|
+
"INSUFFICIENT": "insufficient",
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
METRIC_STATUS_CONFIG = {
|
|
14
|
+
"significant_improvement": {"label": "显著改进", "priority": 5, "is_positive": True, "is_negative": False, "is_blocking": False},
|
|
15
|
+
"improvement_trend": {"label": "改进待确认", "priority": 4, "is_positive": True, "is_negative": False, "is_blocking": False},
|
|
16
|
+
"neutral": {"label": "基本持平", "priority": 2, "is_positive": False, "is_negative": False, "is_blocking": False},
|
|
17
|
+
"risky": {"label": "存在风险", "priority": 6, "is_positive": False, "is_negative": True, "is_blocking": True},
|
|
18
|
+
"degradation": {"label": "退化待确认", "priority": 7, "is_positive": False, "is_negative": True, "is_blocking": False},
|
|
19
|
+
"significant_degradation": {"label": "显著退化", "priority": 8, "is_positive": False, "is_negative": True, "is_blocking": True},
|
|
20
|
+
"insufficient": {"label": "样本量不足", "priority": 1, "is_positive": False, "is_negative": False, "is_blocking": False},
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
STATUS_FILTER_GROUPS = {
|
|
24
|
+
"RISK": ["risky", "degradation", "significant_degradation"],
|
|
25
|
+
"POSITIVE": ["significant_improvement", "improvement_trend"],
|
|
26
|
+
"NEGATIVE": ["significant_degradation", "degradation", "risky"],
|
|
27
|
+
"BLOCKING": ["significant_degradation", "risky"],
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_metric_status_config(status: str) -> dict:
|
|
32
|
+
return METRIC_STATUS_CONFIG.get(status, METRIC_STATUS_CONFIG["neutral"])
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def is_status_in_group(status: str, group_name: str) -> bool:
|
|
36
|
+
group = STATUS_FILTER_GROUPS.get(group_name, [])
|
|
37
|
+
return status in group
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
DECISION_STATUS = {
|
|
41
|
+
"YES": "yes",
|
|
42
|
+
"WATCH": "watch",
|
|
43
|
+
"NO": "no",
|
|
44
|
+
"INSUFFICIENT": "insufficient",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
DECISION_CONFIG = {
|
|
48
|
+
"yes": {"label": "YES", "short_title": "建议上线"},
|
|
49
|
+
"watch": {"label": "WATCH", "short_title": "观察期"},
|
|
50
|
+
"no": {"label": "NO", "short_title": "不建议上线"},
|
|
51
|
+
"insufficient": {"label": "INSUFFICIENT", "short_title": "数据不足"},
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_decision_config(status: str) -> dict:
|
|
56
|
+
return DECISION_CONFIG.get(status, DECISION_CONFIG["insufficient"])
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""指标配置服务,对应 JS 的 src/config/metrics.js + src/services/metricService.js。"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from algobench.config.metric_keywords import infer_better_direction
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_metric_config(metric_name: str, user_config: dict | None = None) -> dict:
|
|
9
|
+
"""获取指标配置。用户配置 > 关键词推断 > 默认值。"""
|
|
10
|
+
inferred = infer_better_direction(metric_name)
|
|
11
|
+
default = {
|
|
12
|
+
"unit": "",
|
|
13
|
+
"better": "lower",
|
|
14
|
+
"target": None,
|
|
15
|
+
"description": "",
|
|
16
|
+
"include_in_decision": True,
|
|
17
|
+
}
|
|
18
|
+
result = {**default, **{k: v for k, v in inferred.items() if v is not None and v != "" and v is not False}}
|
|
19
|
+
if user_config and metric_name in user_config:
|
|
20
|
+
cfg = user_config[metric_name]
|
|
21
|
+
result.update({k: v for k, v in cfg.items() if v is not None})
|
|
22
|
+
return result
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def is_higher_better(metric_name: str, user_config: dict | None = None) -> bool:
|
|
26
|
+
config = get_metric_config(metric_name, user_config)
|
|
27
|
+
return (config.get("better") or "lower") == "higher"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_better_direction(metric_name: str, user_config: dict | None = None) -> str:
|
|
31
|
+
config = get_metric_config(metric_name, user_config)
|
|
32
|
+
return config.get("better") or "lower"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def resolve_metric_direction(metric_name: str, user_config: dict | None = None) -> dict:
|
|
36
|
+
"""解析指标方向,返回完整信息。"""
|
|
37
|
+
config = get_metric_config(metric_name, user_config)
|
|
38
|
+
trend = get_better_direction(metric_name, user_config)
|
|
39
|
+
is_higher = trend == "higher"
|
|
40
|
+
is_target = trend == "target"
|
|
41
|
+
target_value = config.get("target")
|
|
42
|
+
has_valid_target = target_value is not None and str(target_value).strip() != "" and _is_numeric(target_value)
|
|
43
|
+
return {
|
|
44
|
+
"trend": trend,
|
|
45
|
+
"is_higher_better": is_higher,
|
|
46
|
+
"is_target_based": is_target,
|
|
47
|
+
"target_value": target_value if has_valid_target else None,
|
|
48
|
+
"valid": not is_target or has_valid_target,
|
|
49
|
+
"error": "目标型指标缺少有效目标值" if (is_target and not has_valid_target) else None,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _is_numeric(value) -> bool:
|
|
54
|
+
try:
|
|
55
|
+
float(value)
|
|
56
|
+
return True
|
|
57
|
+
except (ValueError, TypeError):
|
|
58
|
+
return False
|