algobench-sdk 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- algobench_sdk-1.0.0/PKG-INFO +114 -0
- algobench_sdk-1.0.0/README.md +88 -0
- algobench_sdk-1.0.0/algobench/__init__.py +25 -0
- algobench_sdk-1.0.0/algobench/api.py +123 -0
- algobench_sdk-1.0.0/algobench/cli.py +181 -0
- algobench_sdk-1.0.0/algobench/config/__init__.py +0 -0
- algobench_sdk-1.0.0/algobench/config/analysis_criteria.py +79 -0
- algobench_sdk-1.0.0/algobench/config/business.py +37 -0
- algobench_sdk-1.0.0/algobench/config/metric_keywords.py +108 -0
- algobench_sdk-1.0.0/algobench/config/metric_status.py +56 -0
- algobench_sdk-1.0.0/algobench/config/metrics.py +58 -0
- algobench_sdk-1.0.0/algobench/config/thresholds.py +34 -0
- algobench_sdk-1.0.0/algobench/decision/__init__.py +0 -0
- algobench_sdk-1.0.0/algobench/decision/engine.py +239 -0
- algobench_sdk-1.0.0/algobench/decision/evaluator.py +230 -0
- algobench_sdk-1.0.0/algobench/exceptions.py +23 -0
- algobench_sdk-1.0.0/algobench/models.py +198 -0
- algobench_sdk-1.0.0/algobench/parsers/__init__.py +0 -0
- algobench_sdk-1.0.0/algobench/parsers/csv_parser.py +119 -0
- algobench_sdk-1.0.0/algobench/stats/__init__.py +0 -0
- algobench_sdk-1.0.0/algobench/stats/core.py +199 -0
- algobench_sdk-1.0.0/algobench/stats/diagnosis.py +147 -0
- algobench_sdk-1.0.0/algobench/stats/qvalue.py +30 -0
- algobench_sdk-1.0.0/algobench/stats/sample_processing.py +93 -0
- algobench_sdk-1.0.0/algobench/stats/tests/__init__.py +0 -0
- algobench_sdk-1.0.0/algobench/stats/tests/data_type.py +88 -0
- algobench_sdk-1.0.0/algobench/stats/tests/effect_size.py +91 -0
- algobench_sdk-1.0.0/algobench/stats/tests/nonparametric.py +73 -0
- algobench_sdk-1.0.0/algobench/stats/tests/smart.py +235 -0
- algobench_sdk-1.0.0/algobench/stats/tests/t_tests.py +110 -0
- algobench_sdk-1.0.0/algobench/utils/__init__.py +0 -0
- algobench_sdk-1.0.0/algobench/utils/decision_logic.py +32 -0
- algobench_sdk-1.0.0/algobench/utils/improvement.py +70 -0
- algobench_sdk-1.0.0/algobench/utils/math_utils.py +97 -0
- algobench_sdk-1.0.0/algobench/utils/numbers.py +40 -0
- algobench_sdk-1.0.0/algobench/utils/quality_level.py +20 -0
- algobench_sdk-1.0.0/algobench_sdk.egg-info/PKG-INFO +114 -0
- algobench_sdk-1.0.0/algobench_sdk.egg-info/SOURCES.txt +42 -0
- algobench_sdk-1.0.0/algobench_sdk.egg-info/dependency_links.txt +1 -0
- algobench_sdk-1.0.0/algobench_sdk.egg-info/entry_points.txt +2 -0
- algobench_sdk-1.0.0/algobench_sdk.egg-info/requires.txt +3 -0
- algobench_sdk-1.0.0/algobench_sdk.egg-info/top_level.txt +1 -0
- algobench_sdk-1.0.0/pyproject.toml +48 -0
- algobench_sdk-1.0.0/setup.cfg +4 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: algobench-sdk
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: AlgoBench - 算法基准测试分析工具的 Python SDK 与 CLI
|
|
5
|
+
Author: DuanxiangLiu
|
|
6
|
+
License-Expression: AGPL-3.0-only
|
|
7
|
+
Project-URL: Homepage, https://github.com/DuanxiangLiu/AlgoBench
|
|
8
|
+
Project-URL: Documentation, https://duanxiangliu.github.io/AlgoBench-pages/
|
|
9
|
+
Project-URL: Repository, https://github.com/DuanxiangLiu/AlgoBench
|
|
10
|
+
Keywords: benchmark,statistics,analysis,decision,eda
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Requires-Dist: scipy>=1.10
|
|
25
|
+
Requires-Dist: pandas>=2.0
|
|
26
|
+
|
|
27
|
+
# AlgoBench Python SDK
|
|
28
|
+
|
|
29
|
+
AlgoBench 是一个算法基准测试分析工具,提供完整的统计分析和决策评估流水线。
|
|
30
|
+
|
|
31
|
+
## 安装
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install algobench
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## 快速开始
|
|
38
|
+
|
|
39
|
+
### Python 库
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from algobench import analyze
|
|
43
|
+
|
|
44
|
+
result = analyze("data.csv", baseline="V1", compare="V2", metrics=["HPWL", "runtime"])
|
|
45
|
+
|
|
46
|
+
print(result.decision.label) # "建议上线"
|
|
47
|
+
print(result.decision.status) # "yes"
|
|
48
|
+
print(result.statistics["HPWL"].mean_imp) # 5.07
|
|
49
|
+
print(result.statistics["HPWL"].p_value) # 6.13e-13
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### 命令行
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# 完整分析
|
|
56
|
+
algobench analyze data.csv -b V1 -c V2 -m HPWL,runtime
|
|
57
|
+
|
|
58
|
+
# 单指标统计
|
|
59
|
+
algobench stats data.csv -b V1 -c V2 -m HPWL
|
|
60
|
+
|
|
61
|
+
# 数据质量检查
|
|
62
|
+
algobench quality data.csv
|
|
63
|
+
|
|
64
|
+
# 数据诊断
|
|
65
|
+
algobench diagnose data.csv
|
|
66
|
+
|
|
67
|
+
# CSV 格式验证
|
|
68
|
+
algobench validate data.csv
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## CSV 数据格式
|
|
72
|
+
|
|
73
|
+
```csv
|
|
74
|
+
Case,Baseline/HPWL,New/HPWL,Baseline/runtime,New/runtime
|
|
75
|
+
case1,1200,1100,50,45
|
|
76
|
+
case2,800,750,30,28
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
- 用例列:`Case`、`Benchmark`、`Test`、`Instance` 等
|
|
80
|
+
- 指标列格式:`算法名/指标名`(使用 `/` 分隔符)
|
|
81
|
+
- 元数据列:`#` 前缀(如 `#Size`)
|
|
82
|
+
- 参数列:`p_` 前缀(如 `p_mode`)
|
|
83
|
+
|
|
84
|
+
## 分析标准
|
|
85
|
+
|
|
86
|
+
| 模式 | 说明 |
|
|
87
|
+
|------|------|
|
|
88
|
+
| `exploratory` | 探索模式,识别更多潜在改进 |
|
|
89
|
+
| `standard` | 标准模式,平衡灵敏度和可靠性 |
|
|
90
|
+
| `strict` | 严格模式,用于发布或关键决策 |
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
result = analyze("data.csv", baseline="V1", compare="V2", criteria_id="strict")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## 决策状态
|
|
97
|
+
|
|
98
|
+
| 状态 | 含义 |
|
|
99
|
+
|------|------|
|
|
100
|
+
| `yes` | 建议上线 |
|
|
101
|
+
| `watch` | 观察期,需要更多数据 |
|
|
102
|
+
| `no` | 不建议上线 |
|
|
103
|
+
| `insufficient` | 数据不足 |
|
|
104
|
+
|
|
105
|
+
## 依赖
|
|
106
|
+
|
|
107
|
+
- Python >= 3.10
|
|
108
|
+
- numpy >= 1.24
|
|
109
|
+
- scipy >= 1.10
|
|
110
|
+
- pandas >= 2.0
|
|
111
|
+
|
|
112
|
+
## 许可证
|
|
113
|
+
|
|
114
|
+
AGPL-3.0-only
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# AlgoBench Python SDK
|
|
2
|
+
|
|
3
|
+
AlgoBench 是一个算法基准测试分析工具,提供完整的统计分析和决策评估流水线。
|
|
4
|
+
|
|
5
|
+
## 安装
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install algobench
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## 快速开始
|
|
12
|
+
|
|
13
|
+
### Python 库
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from algobench import analyze
|
|
17
|
+
|
|
18
|
+
result = analyze("data.csv", baseline="V1", compare="V2", metrics=["HPWL", "runtime"])
|
|
19
|
+
|
|
20
|
+
print(result.decision.label) # "建议上线"
|
|
21
|
+
print(result.decision.status) # "yes"
|
|
22
|
+
print(result.statistics["HPWL"].mean_imp) # 5.07
|
|
23
|
+
print(result.statistics["HPWL"].p_value) # 6.13e-13
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### 命令行
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# 完整分析
|
|
30
|
+
algobench analyze data.csv -b V1 -c V2 -m HPWL,runtime
|
|
31
|
+
|
|
32
|
+
# 单指标统计
|
|
33
|
+
algobench stats data.csv -b V1 -c V2 -m HPWL
|
|
34
|
+
|
|
35
|
+
# 数据质量检查
|
|
36
|
+
algobench quality data.csv
|
|
37
|
+
|
|
38
|
+
# 数据诊断
|
|
39
|
+
algobench diagnose data.csv
|
|
40
|
+
|
|
41
|
+
# CSV 格式验证
|
|
42
|
+
algobench validate data.csv
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## CSV 数据格式
|
|
46
|
+
|
|
47
|
+
```csv
|
|
48
|
+
Case,Baseline/HPWL,New/HPWL,Baseline/runtime,New/runtime
|
|
49
|
+
case1,1200,1100,50,45
|
|
50
|
+
case2,800,750,30,28
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
- 用例列:`Case`、`Benchmark`、`Test`、`Instance` 等
|
|
54
|
+
- 指标列格式:`算法名/指标名`(使用 `/` 分隔符)
|
|
55
|
+
- 元数据列:`#` 前缀(如 `#Size`)
|
|
56
|
+
- 参数列:`p_` 前缀(如 `p_mode`)
|
|
57
|
+
|
|
58
|
+
## 分析标准
|
|
59
|
+
|
|
60
|
+
| 模式 | 说明 |
|
|
61
|
+
|------|------|
|
|
62
|
+
| `exploratory` | 探索模式,识别更多潜在改进 |
|
|
63
|
+
| `standard` | 标准模式,平衡灵敏度和可靠性 |
|
|
64
|
+
| `strict` | 严格模式,用于发布或关键决策 |
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
result = analyze("data.csv", baseline="V1", compare="V2", criteria_id="strict")
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## 决策状态
|
|
71
|
+
|
|
72
|
+
| 状态 | 含义 |
|
|
73
|
+
|------|------|
|
|
74
|
+
| `yes` | 建议上线 |
|
|
75
|
+
| `watch` | 观察期,需要更多数据 |
|
|
76
|
+
| `no` | 不建议上线 |
|
|
77
|
+
| `insufficient` | 数据不足 |
|
|
78
|
+
|
|
79
|
+
## 依赖
|
|
80
|
+
|
|
81
|
+
- Python >= 3.10
|
|
82
|
+
- numpy >= 1.24
|
|
83
|
+
- scipy >= 1.10
|
|
84
|
+
- pandas >= 2.0
|
|
85
|
+
|
|
86
|
+
## 许可证
|
|
87
|
+
|
|
88
|
+
AGPL-3.0-only
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""AlgoBench - 算法基准测试分析工具的 Python SDK 与 CLI。"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__version__ = "1.0.0"
|
|
6
|
+
|
|
7
|
+
from algobench.api import analyze, compute_statistics, make_decision
|
|
8
|
+
from algobench.models import (
|
|
9
|
+
AnalysisResult,
|
|
10
|
+
Decision,
|
|
11
|
+
EvaluationSummary,
|
|
12
|
+
MetricEvaluation,
|
|
13
|
+
StatisticsResult,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"analyze",
|
|
18
|
+
"compute_statistics",
|
|
19
|
+
"make_decision",
|
|
20
|
+
"AnalysisResult",
|
|
21
|
+
"Decision",
|
|
22
|
+
"EvaluationSummary",
|
|
23
|
+
"MetricEvaluation",
|
|
24
|
+
"StatisticsResult",
|
|
25
|
+
]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""AlgoBench 顶层 API,对应前端的一站式分析流程。"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from algobench.models import AnalysisResult, StatisticsResult, EvaluationSummary, Decision, MetricEvaluation
|
|
6
|
+
from algobench.parsers.csv_parser import parse_csv, parse_csv_file
|
|
7
|
+
from algobench.config.metrics import get_metric_config, get_better_direction
|
|
8
|
+
from algobench.stats.core import compute_statistics
|
|
9
|
+
from algobench.stats.diagnosis import check_data_quality
|
|
10
|
+
from algobench.decision.engine import build_evaluation_summary
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def analyze(
|
|
14
|
+
input_source: str,
|
|
15
|
+
baseline: str = "Baseline",
|
|
16
|
+
compare: str = "New",
|
|
17
|
+
metrics: list[str] | None = None,
|
|
18
|
+
selected_cases: list[str] | None = None,
|
|
19
|
+
criteria_id: str = "standard",
|
|
20
|
+
custom_thresholds: dict | None = None,
|
|
21
|
+
metric_configs: dict | None = None,
|
|
22
|
+
) -> AnalysisResult:
|
|
23
|
+
"""一站式分析:解析 → 统计 → 决策。
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
input_source: CSV 文件路径或 CSV 字符串
|
|
27
|
+
baseline: 基线算法名
|
|
28
|
+
compare: 对比算法名
|
|
29
|
+
metrics: 要分析的指标列表,None 表示全部
|
|
30
|
+
selected_cases: 要分析的用例列表,None 表示全部
|
|
31
|
+
criteria_id: 分析标准预设 ID
|
|
32
|
+
custom_thresholds: 自定义阈值
|
|
33
|
+
metric_configs: 用户指标配置
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
AnalysisResult 包含完整分析结果
|
|
37
|
+
"""
|
|
38
|
+
# 1. 解析数据
|
|
39
|
+
if input_source.endswith(".csv"):
|
|
40
|
+
parsed = parse_csv_file(input_source, selected_cases)
|
|
41
|
+
else:
|
|
42
|
+
parsed = parse_csv(input_source, selected_cases)
|
|
43
|
+
|
|
44
|
+
data = parsed["data"]
|
|
45
|
+
algos = parsed["algos"]
|
|
46
|
+
available_metrics = parsed["metrics"]
|
|
47
|
+
|
|
48
|
+
if not data or len(algos) < 2:
|
|
49
|
+
return AnalysisResult(parsed=parsed)
|
|
50
|
+
|
|
51
|
+
# 确定要分析的指标
|
|
52
|
+
target_metrics = [m for m in (metrics or available_metrics) if m in available_metrics]
|
|
53
|
+
|
|
54
|
+
# 2. 数据质量检查
|
|
55
|
+
quality = check_data_quality(data, algos, target_metrics)
|
|
56
|
+
|
|
57
|
+
# 3. 逐指标统计计算
|
|
58
|
+
statistics_results: dict[str, StatisticsResult] = {}
|
|
59
|
+
for metric in target_metrics:
|
|
60
|
+
direction = get_better_direction(metric, metric_configs)
|
|
61
|
+
stats = compute_statistics(data, metric, baseline, compare, direction, selected_cases)
|
|
62
|
+
statistics_results[metric] = stats
|
|
63
|
+
|
|
64
|
+
# 4. 构建评估摘要(包含决策)
|
|
65
|
+
dataset_rows = parsed.get("filteredCases", 0) + len(data)
|
|
66
|
+
evaluation = build_evaluation_summary(
|
|
67
|
+
stats_results=statistics_results,
|
|
68
|
+
metric_configs=metric_configs,
|
|
69
|
+
criteria_id=criteria_id,
|
|
70
|
+
custom_thresholds=custom_thresholds,
|
|
71
|
+
n_selected_cases=len(data),
|
|
72
|
+
dataset_rows=dataset_rows,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# 5. 提取决策
|
|
76
|
+
decision = Decision(
|
|
77
|
+
status=evaluation.decision_status,
|
|
78
|
+
label=evaluation.decision_label,
|
|
79
|
+
reason=evaluation.decision_reason,
|
|
80
|
+
blockers=evaluation.blockers,
|
|
81
|
+
recommendations=evaluation.recommendations,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return AnalysisResult(
|
|
85
|
+
parsed=parsed,
|
|
86
|
+
statistics=statistics_results,
|
|
87
|
+
quality=quality,
|
|
88
|
+
evaluation=evaluation,
|
|
89
|
+
decision=decision,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def compute_statistics_only(
|
|
94
|
+
input_source: str,
|
|
95
|
+
metric: str,
|
|
96
|
+
baseline: str = "Baseline",
|
|
97
|
+
compare: str = "New",
|
|
98
|
+
selected_cases: list[str] | None = None,
|
|
99
|
+
metric_configs: dict | None = None,
|
|
100
|
+
) -> StatisticsResult:
|
|
101
|
+
"""仅计算单指标统计。"""
|
|
102
|
+
if input_source.endswith(".csv"):
|
|
103
|
+
parsed = parse_csv_file(input_source, selected_cases)
|
|
104
|
+
else:
|
|
105
|
+
parsed = parse_csv(input_source, selected_cases)
|
|
106
|
+
|
|
107
|
+
direction = get_better_direction(metric, metric_configs)
|
|
108
|
+
return compute_statistics(parsed["data"], metric, baseline, compare, direction, selected_cases)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def make_decision(
|
|
112
|
+
stats_results: dict[str, StatisticsResult],
|
|
113
|
+
metric_configs: dict | None = None,
|
|
114
|
+
criteria_id: str = "standard",
|
|
115
|
+
custom_thresholds: dict | None = None,
|
|
116
|
+
) -> EvaluationSummary:
|
|
117
|
+
"""基于已有统计结果生成决策。"""
|
|
118
|
+
return build_evaluation_summary(
|
|
119
|
+
stats_results=stats_results,
|
|
120
|
+
metric_configs=metric_configs,
|
|
121
|
+
criteria_id=criteria_id,
|
|
122
|
+
custom_thresholds=custom_thresholds,
|
|
123
|
+
)
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""AlgoBench CLI 入口。"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
from dataclasses import asdict
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _serialize(obj):
|
|
13
|
+
"""递归序列化 dataclass 为 dict。"""
|
|
14
|
+
if hasattr(obj, "__dataclass_fields__"):
|
|
15
|
+
return {k: _serialize(v) for k, v in asdict(obj).items()}
|
|
16
|
+
if isinstance(obj, list):
|
|
17
|
+
return [_serialize(item) for item in obj]
|
|
18
|
+
if isinstance(obj, dict):
|
|
19
|
+
return {k: _serialize(v) for k, v in obj.items()}
|
|
20
|
+
return obj
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def cmd_analyze(args):
|
|
24
|
+
from algobench.api import analyze
|
|
25
|
+
metrics = args.metrics.split(",") if args.metrics else None
|
|
26
|
+
result = analyze(
|
|
27
|
+
input_source=args.input,
|
|
28
|
+
baseline=args.baseline,
|
|
29
|
+
compare=args.compare,
|
|
30
|
+
metrics=metrics,
|
|
31
|
+
criteria_id=args.criteria,
|
|
32
|
+
)
|
|
33
|
+
output = {
|
|
34
|
+
"decision": _serialize(result.decision),
|
|
35
|
+
"statistics": {k: _serialize(v) for k, v in result.statistics.items()},
|
|
36
|
+
"quality": result.quality,
|
|
37
|
+
"evaluation": _serialize(result.evaluation),
|
|
38
|
+
}
|
|
39
|
+
_print_output(output, args.format)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def cmd_stats(args):
|
|
43
|
+
from algobench.api import compute_statistics_only
|
|
44
|
+
result = compute_statistics_only(
|
|
45
|
+
input_source=args.input,
|
|
46
|
+
metric=args.metric,
|
|
47
|
+
baseline=args.baseline,
|
|
48
|
+
compare=args.compare,
|
|
49
|
+
)
|
|
50
|
+
_print_output(_serialize(result), args.format)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def cmd_quality(args):
|
|
54
|
+
from algobench.parsers.csv_parser import parse_csv_file
|
|
55
|
+
from algobench.stats.diagnosis import check_data_quality
|
|
56
|
+
parsed = parse_csv_file(args.input)
|
|
57
|
+
result = check_data_quality(parsed["data"], parsed["algos"], parsed["metrics"])
|
|
58
|
+
_print_output(result, args.format)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def cmd_diagnose(args):
|
|
62
|
+
from algobench.parsers.csv_parser import parse_csv_file
|
|
63
|
+
from algobench.stats.diagnosis import diagnose_data_issues
|
|
64
|
+
parsed = parse_csv_file(args.input)
|
|
65
|
+
issues = diagnose_data_issues(parsed["data"], parsed["algos"], parsed["metrics"])
|
|
66
|
+
_print_output(issues, args.format)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def cmd_validate(args):
|
|
70
|
+
from algobench.parsers.csv_parser import parse_csv_file
|
|
71
|
+
parsed = parse_csv_file(args.input)
|
|
72
|
+
result = {
|
|
73
|
+
"valid": bool(parsed["data"] and len(parsed["algos"]) >= 2),
|
|
74
|
+
"total_cases": len(parsed["data"]),
|
|
75
|
+
"algos": parsed["algos"],
|
|
76
|
+
"metrics": parsed["metrics"],
|
|
77
|
+
"meta_columns": parsed["metaColumns"],
|
|
78
|
+
"param_columns": parsed["paramColumns"],
|
|
79
|
+
}
|
|
80
|
+
_print_output(result, args.format)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _print_output(data, fmt: str = "json"):
|
|
84
|
+
if fmt == "json":
|
|
85
|
+
print(json.dumps(data, indent=2, ensure_ascii=False))
|
|
86
|
+
elif fmt == "table":
|
|
87
|
+
_print_table(data)
|
|
88
|
+
else:
|
|
89
|
+
print(json.dumps(data, indent=2, ensure_ascii=False))
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _print_table(data):
|
|
93
|
+
if isinstance(data, dict) and "decision" in data:
|
|
94
|
+
d = data["decision"]
|
|
95
|
+
print(f"决策: {d['label']} ({d['status']})")
|
|
96
|
+
print(f"原因: {d['reason']}")
|
|
97
|
+
if d.get("blockers"):
|
|
98
|
+
print("阻塞项:")
|
|
99
|
+
for b in d["blockers"]:
|
|
100
|
+
print(f" - {b}")
|
|
101
|
+
if d.get("recommendations"):
|
|
102
|
+
print("建议:")
|
|
103
|
+
for r in d["recommendations"]:
|
|
104
|
+
print(f" - {r}")
|
|
105
|
+
if "statistics" in data:
|
|
106
|
+
print("\n统计结果:")
|
|
107
|
+
for metric, stats in data["statistics"].items():
|
|
108
|
+
imp = stats.get("mean_imp")
|
|
109
|
+
p = stats.get("p_value")
|
|
110
|
+
n = stats.get("n_valid")
|
|
111
|
+
print(f" {metric}: 改进率={imp}%, p值={p}, 样本量={n}")
|
|
112
|
+
elif isinstance(data, dict) and "score" in data:
|
|
113
|
+
print(f"数据质量: {data['score']}分 ({data['level']})")
|
|
114
|
+
if data.get("issues"):
|
|
115
|
+
for issue in data["issues"]:
|
|
116
|
+
print(f" [{issue['severity']}] {issue['message']}")
|
|
117
|
+
else:
|
|
118
|
+
print(json.dumps(data, indent=2, ensure_ascii=False))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def main():
|
|
122
|
+
parser = argparse.ArgumentParser(prog="algobench", description="AlgoBench - 算法基准测试分析工具")
|
|
123
|
+
sub = parser.add_subparsers(dest="command", help="可用命令")
|
|
124
|
+
|
|
125
|
+
# analyze
|
|
126
|
+
p_analyze = sub.add_parser("analyze", help="完整分析流程")
|
|
127
|
+
p_analyze.add_argument("input", help="CSV 文件路径")
|
|
128
|
+
p_analyze.add_argument("-b", "--baseline", default="Baseline", help="基线算法名")
|
|
129
|
+
p_analyze.add_argument("-c", "--compare", default="New", help="对比算法名")
|
|
130
|
+
p_analyze.add_argument("-m", "--metrics", default=None, help="指标列表(逗号分隔)")
|
|
131
|
+
p_analyze.add_argument("--criteria", default="standard", choices=["exploratory", "standard", "strict"], help="分析标准")
|
|
132
|
+
p_analyze.add_argument("-f", "--format", default="table", choices=["json", "table"], help="输出格式")
|
|
133
|
+
|
|
134
|
+
# stats
|
|
135
|
+
p_stats = sub.add_parser("stats", help="单指标统计计算")
|
|
136
|
+
p_stats.add_argument("input", help="CSV 文件路径")
|
|
137
|
+
p_stats.add_argument("-b", "--baseline", default="Baseline", help="基线算法名")
|
|
138
|
+
p_stats.add_argument("-c", "--compare", default="New", help="对比算法名")
|
|
139
|
+
p_stats.add_argument("-m", "--metric", required=True, help="指标名")
|
|
140
|
+
p_stats.add_argument("-f", "--format", default="json", choices=["json", "table"], help="输出格式")
|
|
141
|
+
|
|
142
|
+
# quality
|
|
143
|
+
p_quality = sub.add_parser("quality", help="数据质量检查")
|
|
144
|
+
p_quality.add_argument("input", help="CSV 文件路径")
|
|
145
|
+
p_quality.add_argument("-f", "--format", default="table", choices=["json", "table"], help="输出格式")
|
|
146
|
+
|
|
147
|
+
# diagnose
|
|
148
|
+
p_diagnose = sub.add_parser("diagnose", help="数据诊断")
|
|
149
|
+
p_diagnose.add_argument("input", help="CSV 文件路径")
|
|
150
|
+
p_diagnose.add_argument("-f", "--format", default="json", choices=["json", "table"], help="输出格式")
|
|
151
|
+
|
|
152
|
+
# validate
|
|
153
|
+
p_validate = sub.add_parser("validate", help="CSV 格式验证")
|
|
154
|
+
p_validate.add_argument("input", help="CSV 文件路径")
|
|
155
|
+
p_validate.add_argument("-f", "--format", default="json", choices=["json", "table"], help="输出格式")
|
|
156
|
+
|
|
157
|
+
args = parser.parse_args()
|
|
158
|
+
if not args.command:
|
|
159
|
+
parser.print_help()
|
|
160
|
+
sys.exit(1)
|
|
161
|
+
|
|
162
|
+
commands = {
|
|
163
|
+
"analyze": cmd_analyze,
|
|
164
|
+
"stats": cmd_stats,
|
|
165
|
+
"quality": cmd_quality,
|
|
166
|
+
"diagnose": cmd_diagnose,
|
|
167
|
+
"validate": cmd_validate,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
commands[args.command](args)
|
|
172
|
+
except FileNotFoundError:
|
|
173
|
+
print(f"错误: 文件不存在 - {args.input}", file=sys.stderr)
|
|
174
|
+
sys.exit(1)
|
|
175
|
+
except Exception as e:
|
|
176
|
+
print(f"错误: {e}", file=sys.stderr)
|
|
177
|
+
sys.exit(1)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
if __name__ == "__main__":
|
|
181
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""分析标准预设,对应 JS 的 src/config/analysisCriteria.js。"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
DEFAULT_CRITERIA_ID = "standard"
|
|
6
|
+
|
|
7
|
+
ANALYSIS_CRITERIA_PRESETS = {
|
|
8
|
+
"exploratory": {
|
|
9
|
+
"id": "exploratory",
|
|
10
|
+
"label": "探索模式",
|
|
11
|
+
"description": "识别更多潜在的改进机会",
|
|
12
|
+
"thresholds": {
|
|
13
|
+
"minMeaningfulImprovement": 2,
|
|
14
|
+
"degradationWarning": 0.25,
|
|
15
|
+
"degradationCritical": 0.35,
|
|
16
|
+
"effectSize": {"NEGLIGIBLE": 0.15, "SMALL": 0.35, "MEDIUM": 0.55},
|
|
17
|
+
"reliabilityFactors": {
|
|
18
|
+
"significance": {"high": 0.05, "medium": 0.1, "marginal": 0.15},
|
|
19
|
+
"sample": {"large": 15, "medium": 8, "small": 5},
|
|
20
|
+
},
|
|
21
|
+
},
|
|
22
|
+
},
|
|
23
|
+
"standard": {
|
|
24
|
+
"id": "standard",
|
|
25
|
+
"label": "标准模式",
|
|
26
|
+
"description": "获取可靠结论以支持决策",
|
|
27
|
+
"thresholds": {
|
|
28
|
+
"minMeaningfulImprovement": 3,
|
|
29
|
+
"degradationWarning": 0.2,
|
|
30
|
+
"degradationCritical": 0.3,
|
|
31
|
+
"effectSize": {"NEGLIGIBLE": 0.2, "SMALL": 0.5, "MEDIUM": 0.8},
|
|
32
|
+
"reliabilityFactors": {
|
|
33
|
+
"significance": {"high": 0.01, "medium": 0.05, "marginal": 0.1},
|
|
34
|
+
"sample": {"large": 30, "medium": 15, "small": 8},
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
"strict": {
|
|
39
|
+
"id": "strict",
|
|
40
|
+
"label": "严格模式",
|
|
41
|
+
"description": "发布结果或制定关键决策",
|
|
42
|
+
"thresholds": {
|
|
43
|
+
"minMeaningfulImprovement": 4,
|
|
44
|
+
"degradationWarning": 0.15,
|
|
45
|
+
"degradationCritical": 0.25,
|
|
46
|
+
"effectSize": {"NEGLIGIBLE": 0.25, "SMALL": 0.6, "MEDIUM": 0.9},
|
|
47
|
+
"reliabilityFactors": {
|
|
48
|
+
"significance": {"high": 0.005, "medium": 0.01, "marginal": 0.05},
|
|
49
|
+
"sample": {"large": 40, "medium": 25, "small": 15},
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_criteria_preset(criteria_id: str) -> dict:
|
|
57
|
+
return ANALYSIS_CRITERIA_PRESETS.get(criteria_id, ANALYSIS_CRITERIA_PRESETS[DEFAULT_CRITERIA_ID])
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_effective_thresholds(
|
|
61
|
+
criteria_id: str = DEFAULT_CRITERIA_ID,
|
|
62
|
+
custom_thresholds: dict | None = None,
|
|
63
|
+
) -> dict:
|
|
64
|
+
"""获取生效的阈值配置。本地自定义 > 预设。"""
|
|
65
|
+
base = get_criteria_preset(criteria_id)["thresholds"]
|
|
66
|
+
if not custom_thresholds:
|
|
67
|
+
return base
|
|
68
|
+
|
|
69
|
+
merged = {**base, **{k: v for k, v in custom_thresholds.items() if k not in ("effectSize", "reliabilityFactors")}}
|
|
70
|
+
merged["effectSize"] = {**base.get("effectSize", {}), **(custom_thresholds.get("effectSize") or {})}
|
|
71
|
+
base_rf = base.get("reliabilityFactors", {})
|
|
72
|
+
custom_rf = custom_thresholds.get("reliabilityFactors") or {}
|
|
73
|
+
merged["reliabilityFactors"] = {
|
|
74
|
+
**base_rf,
|
|
75
|
+
**{k: v for k, v in custom_rf.items() if k not in ("sample", "significance")},
|
|
76
|
+
"sample": {**base_rf.get("sample", {}), **(custom_rf.get("sample") or {})},
|
|
77
|
+
"significance": {**base_rf.get("significance", {}), **(custom_rf.get("significance") or {})},
|
|
78
|
+
}
|
|
79
|
+
return merged
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""业务规则配置,对应 JS 的 src/config/business.js。"""
|
|
2
|
+
|
|
3
|
+
METRIC_SEPARATOR = "/"
|
|
4
|
+
PARAM_PREFIX = "p_"
|
|
5
|
+
META_PREFIX = "#"
|
|
6
|
+
|
|
7
|
+
# 用例列名候选(不区分大小写匹配)
|
|
8
|
+
CASE_COLUMN_NAMES = [
|
|
9
|
+
"case", "benchmark", "test", "circuit", "design",
|
|
10
|
+
"instance", "sample", "experiment", "dataset", "workload",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def is_case_column(header: str) -> bool:
|
|
15
|
+
return header.strip().lower() in CASE_COLUMN_NAMES
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_meta_column(header: str) -> bool:
|
|
19
|
+
return header.startswith(META_PREFIX)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def is_param_column(header: str) -> bool:
|
|
23
|
+
return header.startswith(PARAM_PREFIX)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def is_metric_column(header: str) -> bool:
|
|
27
|
+
return METRIC_SEPARATOR in header
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def parse_metric_column(header: str) -> tuple[str, str] | None:
|
|
31
|
+
"""解析指标列名,返回 (算法名, 指标名)。"""
|
|
32
|
+
if not is_metric_column(header):
|
|
33
|
+
return None
|
|
34
|
+
parts = header.split(METRIC_SEPARATOR, 1)
|
|
35
|
+
if len(parts) == 2 and parts[0].strip() and parts[1].strip():
|
|
36
|
+
return parts[0].strip(), parts[1].strip()
|
|
37
|
+
return None
|