@chongyan/autospec 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.en.md +472 -0
- package/README.md +476 -0
- package/bin/autospec.js +3 -0
- package/knowledge/README.md +144 -0
- package/knowledge/checklists/code.md +182 -0
- package/knowledge/checklists/design.md +196 -0
- package/knowledge/checklists/release.md +70 -0
- package/knowledge/checklists/requirement.md +169 -0
- package/knowledge/checklists/test.md +46 -0
- package/knowledge/config/README.en.md +44 -0
- package/knowledge/config/README.md +44 -0
- package/knowledge/config/role-composition.yaml +98 -0
- package/knowledge/config/role-extensions.yaml +140 -0
- package/knowledge/config/skill-compositions.yaml +142 -0
- package/knowledge/config/team-stage.yaml +95 -0
- package/knowledge/config/team-tasks.yaml +139 -0
- package/knowledge/config/team-triggers.yaml +198 -0
- package/knowledge/config/validation-patterns.yaml +137 -0
- package/knowledge/domain/README.md +115 -0
- package/knowledge/domain/flows/README.md +194 -0
- package/knowledge/domain/glossary.md +143 -0
- package/knowledge/domain/rules.md +138 -0
- package/knowledge/environment/README.en.md +36 -0
- package/knowledge/environment/README.md +87 -0
- package/knowledge/environment/component-knowledge.md +316 -0
- package/knowledge/environment/detection-patterns.yaml +502 -0
- package/knowledge/environment/middleware-knowledge.md +237 -0
- package/knowledge/environment/template-registry.md +321 -0
- package/knowledge/guides/domain-driven-design.md +345 -0
- package/knowledge/guides/knowledge-management.md +369 -0
- package/knowledge/guides/requirement-engineering.md +329 -0
- package/knowledge/guides/stages/ai-effect-evaluator.md +93 -0
- package/knowledge/guides/stages/code-implementer.md +205 -0
- package/knowledge/guides/stages/code-reviewer.md +111 -0
- package/knowledge/guides/stages/consistency-checker.md +177 -0
- package/knowledge/guides/stages/design-planner.md +401 -0
- package/knowledge/guides/stages/design-reviewer.md +83 -0
- package/knowledge/guides/stages/integration-test-runner.md +105 -0
- package/knowledge/guides/stages/release-checker.md +205 -0
- package/knowledge/guides/stages/requirement-analyzer.md +195 -0
- package/knowledge/guides/stages/requirement-reviewer.md +83 -0
- package/knowledge/guides/stages/security-reviewer.md +89 -0
- package/knowledge/guides/stages/test-context-analyzer.md +250 -0
- package/knowledge/guides/stages/test-generator.md +241 -0
- package/knowledge/guides/stages/test-planner.md +183 -0
- package/knowledge/guides/stages/test-reviewer.md +76 -0
- package/knowledge/guides/stages/unit-test-runner.md +83 -0
- package/knowledge/guides/support/ai-agent-analyzer.md +362 -0
- package/knowledge/guides/support/ai-anomaly-analyzer.md +213 -0
- package/knowledge/guides/support/ai-artifact-evaluator.md +192 -0
- package/knowledge/guides/support/ai-capability-analyzer.md +193 -0
- package/knowledge/guides/support/ai-component-analyzer.md +169 -0
- package/knowledge/guides/support/ai-data-validator.md +276 -0
- package/knowledge/guides/support/ai-evaluation-planner.md +374 -0
- package/knowledge/guides/support/ai-path-evaluator.md +274 -0
- package/knowledge/guides/support/ai-pipeline-evaluator.md +219 -0
- package/knowledge/guides/support/ai-rag-analyzer.md +339 -0
- package/knowledge/guides/support/ai-task-assessor.md +418 -0
- package/knowledge/guides/support/ai-test-diagnostics.md +133 -0
- package/knowledge/guides/support/complexity-assessor.md +268 -0
- package/knowledge/guides/support/component-discovery.md +183 -0
- package/knowledge/guides/support/environment-scanner.md +207 -0
- package/knowledge/guides/support/environment-validator.md +207 -0
- package/knowledge/guides/support/knowledge-generator.md +234 -0
- package/knowledge/guides/support/methodology-extractor.md +55 -0
- package/knowledge/guides/support/pipeline-protocol.md +438 -0
- package/knowledge/guides/support/practice-logger.md +359 -0
- package/knowledge/guides/support/scope-inference.md +174 -0
- package/knowledge/guides/support/skill-distiller.md +91 -0
- package/knowledge/guides/support/skill-updater.md +45 -0
- package/knowledge/guides/support/skill-validator.md +72 -0
- package/knowledge/guides/support/team-orchestrator.md +323 -0
- package/knowledge/guides/support/tech-stack-analyzer.md +139 -0
- package/knowledge/guides/support/test-runner.md +254 -0
- package/knowledge/guides/system-design.md +352 -0
- package/knowledge/organization/ai-native-team.md +318 -0
- package/knowledge/organization/team-metrics.md +228 -0
- package/knowledge/principles/constitution.md +134 -0
- package/knowledge/principles/core-principles.md +368 -0
- package/knowledge/principles/design-philosophy.md +877 -0
- package/knowledge/principles/evolution.md +553 -0
- package/knowledge/process/01-requirement.md +113 -0
- package/knowledge/process/02-design.md +123 -0
- package/knowledge/process/03-implementation.md +90 -0
- package/knowledge/process/04-review.md +80 -0
- package/knowledge/process/05-testing.md +90 -0
- package/knowledge/process/06-delivery.md +88 -0
- package/knowledge/process/README.en.md +38 -0
- package/knowledge/process/README.md +48 -0
- package/knowledge/process/ai-sdlc.md +475 -0
- package/knowledge/process/overview.md +319 -0
- package/knowledge/standards/code-review.md +876 -0
- package/knowledge/standards/coding-style.md +940 -0
- package/knowledge/standards/data-consistency.md +1085 -0
- package/knowledge/standards/document-versioning.md +210 -0
- package/knowledge/standards/risk-detection.md +186 -0
- package/knowledge/templates/ai-evaluation.md +150 -0
- package/knowledge/templates/api-design.md +117 -0
- package/knowledge/templates/database-design.md +132 -0
- package/knowledge/templates/domain-driven-design.md +321 -0
- package/knowledge/templates/product-proposal.md +201 -0
- package/knowledge/templates/system-design.md +227 -0
- package/knowledge/templates/task-breakdown.md +107 -0
- package/knowledge/templates/test-case.md +170 -0
- package/package.json +53 -0
- package/plugins/.claude-plugin/plugin.json +134 -0
- package/plugins/agents/roles/ai-engineer.md +129 -0
- package/plugins/agents/roles/backend-engineer.md +165 -0
- package/plugins/agents/roles/ceo.md +94 -0
- package/plugins/agents/roles/data-engineer.md +135 -0
- package/plugins/agents/roles/devops-engineer.md +181 -0
- package/plugins/agents/roles/frontend-engineer.md +129 -0
- package/plugins/agents/roles/product-owner.md +98 -0
- package/plugins/agents/roles/quality-engineer.md +129 -0
- package/plugins/agents/roles/security-engineer.md +180 -0
- package/plugins/agents/roles/tech-lead.md +97 -0
- package/plugins/agents/support/blind-comparator.md +88 -0
- package/plugins/agents/support/consistency-checker.md +103 -0
- package/plugins/agents/support/failure-diagnostician.md +141 -0
- package/plugins/agents/support/independent-reviewer.md +80 -0
- package/plugins/agents/support/safety-auditor.md +121 -0
- package/plugins/agents/support/skill-benchmarker.md +86 -0
- package/plugins/agents/support/skill-forger.md +105 -0
- package/plugins/agents/support/stage-gate-evaluator.md +121 -0
- package/plugins/agents/support/test-coverage-reviewer.md +73 -0
- package/plugins/benchmarks/templates/README.md +44 -0
- package/plugins/benchmarks/templates/commands/explore-template.yaml +48 -0
- package/plugins/benchmarks/templates/pipeline/agile-template.yaml +84 -0
- package/plugins/benchmarks/templates/pipeline/waterfall-template.yaml +106 -0
- package/plugins/benchmarks/templates/skills/requirement-analyzer-template.yaml +48 -0
- package/plugins/commands/README.en.md +96 -0
- package/plugins/commands/README.md +96 -0
- package/plugins/commands/apply.md +191 -0
- package/plugins/commands/archive.md +76 -0
- package/plugins/commands/env-export.md +79 -0
- package/plugins/commands/env-sync.md +640 -0
- package/plugins/commands/env-template.md +223 -0
- package/plugins/commands/env-update.md +264 -0
- package/plugins/commands/env-validate.md +176 -0
- package/plugins/commands/env.md +79 -0
- package/plugins/commands/explore.md +76 -0
- package/plugins/commands/field-evolve.md +536 -0
- package/plugins/commands/memory.md +249 -0
- package/plugins/commands/project-evolve.md +821 -0
- package/plugins/commands/propose.md +93 -0
- package/plugins/commands/review.md +140 -0
- package/plugins/commands/run.md +224 -0
- package/plugins/commands/status.md +62 -0
- package/plugins/commands/validate.md +108 -0
- package/plugins/hooks/README.en.md +56 -0
- package/plugins/hooks/README.md +56 -0
- package/plugins/hooks/ai-project-guard.js +329 -0
- package/plugins/hooks/artifact-evaluation-hook.js +237 -0
- package/plugins/hooks/constitution-guard.js +211 -0
- package/plugins/hooks/environment-autocommit.js +264 -0
- package/plugins/hooks/environment-manager.js +778 -0
- package/plugins/hooks/execution-tracker.js +354 -0
- package/plugins/hooks/frozen-zone-guard.js +140 -0
- package/plugins/hooks/layer1-validator.js +423 -0
- package/plugins/hooks/lib/artifact-evaluator.js +414 -0
- package/plugins/hooks/lib/benchmarks/change-detector.js +390 -0
- package/plugins/hooks/lib/benchmarks/evaluator.js +605 -0
- package/plugins/hooks/lib/benchmarks/integration-example.js +169 -0
- package/plugins/hooks/lib/data-and-ai-detector.js +275 -0
- package/plugins/hooks/lib/detection-pattern-loader.js +865 -0
- package/plugins/hooks/lib/directory-discovery.js +395 -0
- package/plugins/hooks/lib/environment-config-loader.js +341 -0
- package/plugins/hooks/lib/environment-detector.js +553 -0
- package/plugins/hooks/lib/environment-evolver.js +564 -0
- package/plugins/hooks/lib/environment-registry.js +813 -0
- package/plugins/hooks/lib/execution-path.js +427 -0
- package/plugins/hooks/lib/hook-error-recorder.js +245 -0
- package/plugins/hooks/lib/hook-logger.js +538 -0
- package/plugins/hooks/lib/hook-runner.js +97 -0
- package/plugins/hooks/lib/hook-runner.sh +44 -0
- package/plugins/hooks/lib/hook-state-manager.js +480 -0
- package/plugins/hooks/lib/memory-extractor.js +377 -0
- package/plugins/hooks/lib/memory-manager.js +673 -0
- package/plugins/hooks/lib/metrics-analyzer.js +489 -0
- package/plugins/hooks/lib/project-evolution/auto-fixer.js +511 -0
- package/plugins/hooks/lib/project-evolution/memory-manager.js +346 -0
- package/plugins/hooks/lib/project-evolution/pattern-detector.js +476 -0
- package/plugins/hooks/lib/project-evolution/semantic-indexer.js +480 -0
- package/plugins/hooks/lib/project-structure-detector.js +326 -0
- package/plugins/hooks/lib/rollback-tracker.js +346 -0
- package/plugins/hooks/lib/source-code-scanner.js +596 -0
- package/plugins/hooks/lib/technology-stack-detector.js +374 -0
- package/plugins/hooks/lib/test-failure-analyzer.js +375 -0
- package/plugins/hooks/lib/test-failure-fixer.js +268 -0
- package/plugins/hooks/lib/trace-context.js +277 -0
- package/plugins/hooks/lib/validation-patterns.js +415 -0
- package/plugins/hooks/memory-sync.js +171 -0
- package/plugins/hooks/pipeline-observer.js +413 -0
- package/plugins/hooks/scope-sentinel.js +204 -0
- package/plugins/hooks/trace-initialization.js +169 -0
- package/plugins/memory/templates/code-quality.yaml +149 -0
- package/plugins/memory/templates/multi-system.yaml +155 -0
- package/plugins/memory/templates/team-habits.yaml +119 -0
- package/plugins/memory/templates/testing.yaml +121 -0
- package/plugins/skills/README.en.md +47 -0
- package/plugins/skills/README.md +104 -0
- package/plugins/skills/benchmark-executor/README.md +93 -0
- package/plugins/skills/benchmark-executor/SKILL.md +647 -0
- package/plugins/skills/benchmark-generator/SKILL.md +349 -0
- package/plugins/skills/delivery-stage/SKILL.md +203 -0
- package/plugins/skills/design-stage/SKILL.md +216 -0
- package/plugins/skills/evolution-process/SKILL.md +291 -0
- package/plugins/skills/exploration-phase/SKILL.md +133 -0
- package/plugins/skills/implementation-stage/SKILL.md +179 -0
- package/plugins/skills/layer1-validation/SKILL.md +79 -0
- package/plugins/skills/pending-dashboard/SKILL.md +109 -0
- package/plugins/skills/project-evolution/SKILL.md +847 -0
- package/plugins/skills/requirement-stage/SKILL.md +183 -0
- package/plugins/skills/skill-forge/SKILL.md +223 -0
- package/plugins/skills/skill-forge/references/description-guide.md +92 -0
- package/plugins/skills/skill-forge/references/quality-rubric.md +104 -0
- package/plugins/skills/skill-forge/references/skill-template.md +106 -0
- package/plugins/skills/startup-guard/SKILL.md +38 -0
- package/plugins/skills/testing-stage/SKILL.md +195 -0
- package/scripts/cli/global-init.js +288 -0
- package/scripts/cli/global.js +324 -0
- package/scripts/cli/index.js +55 -0
- package/scripts/cli/init.js +382 -0
- package/scripts/cli/list.js +69 -0
- package/scripts/cli/org.js +340 -0
- package/scripts/cli/update.js +44 -0
- package/scripts/config/commands.config.js +145 -0
- package/scripts/config/hooks.config.js +197 -0
- package/scripts/evolution/evolution-router.js +273 -0
- package/scripts/evolution/evolution-signal-collector.js +307 -0
- package/scripts/evolution/knowledge-loader.js +346 -0
- package/scripts/evolution/marketplace.js +317 -0
- package/scripts/evolution/version-manager.js +371 -0
- package/scripts/install/agents.js +106 -0
- package/scripts/install/commands.js +133 -0
- package/scripts/install/constants.js +424 -0
- package/scripts/install/hook-logger.js +536 -0
- package/scripts/install/hooks.js +110 -0
- package/scripts/install/index.js +39 -0
- package/scripts/install/skills.js +95 -0
- package/scripts/postinstall.js +25 -0
- package/scripts/state.js +376 -0
|
@@ -0,0 +1,605 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmarks Evaluator
|
|
3
|
+
* 评测执行器 - 负责执行 benchmarks 评测,计算评分,生成报告
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const fs = require('fs');
|
|
7
|
+
const path = require('path');
|
|
8
|
+
const yaml = require('js-yaml');
|
|
9
|
+
|
|
10
|
+
class BenchmarkEvaluator {
|
|
11
|
+
constructor(projectRoot) {
|
|
12
|
+
this.projectRoot = projectRoot;
|
|
13
|
+
this.resultsPath = path.join(projectRoot, '.autospec', 'benchmarks', 'results');
|
|
14
|
+
this.benchmarksPath = path.join(projectRoot, '.autospec', 'benchmarks');
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* 初始化评测器
|
|
19
|
+
*/
|
|
20
|
+
init() {
|
|
21
|
+
if (!fs.existsSync(this.resultsPath)) {
|
|
22
|
+
fs.mkdirSync(this.resultsPath, { recursive: true });
|
|
23
|
+
}
|
|
24
|
+
return this;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* 加载 benchmarks
|
|
29
|
+
*/
|
|
30
|
+
loadBenchmarks(stage) {
|
|
31
|
+
const benchmarks = [];
|
|
32
|
+
|
|
33
|
+
// 加载自动生成的 benchmarks
|
|
34
|
+
const autoGenPath = path.join(this.benchmarksPath, 'auto-generated');
|
|
35
|
+
if (fs.existsSync(autoGenPath)) {
|
|
36
|
+
const files = fs.readdirSync(autoGenPath)
|
|
37
|
+
.filter(f => f.startsWith(`${stage}-`) && f.endsWith('.yaml'))
|
|
38
|
+
.sort()
|
|
39
|
+
.reverse(); // 最新的在前
|
|
40
|
+
|
|
41
|
+
if (files.length > 0) {
|
|
42
|
+
const latestFile = path.join(autoGenPath, files[0]);
|
|
43
|
+
const content = fs.readFileSync(latestFile, 'utf8');
|
|
44
|
+
const data = yaml.load(content);
|
|
45
|
+
if (data && data.test_cases) {
|
|
46
|
+
benchmarks.push(...data.test_cases);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// 加载自定义 benchmarks
|
|
52
|
+
const customPath = path.join(this.benchmarksPath, 'custom');
|
|
53
|
+
if (fs.existsSync(customPath)) {
|
|
54
|
+
const files = fs.readdirSync(customPath)
|
|
55
|
+
.filter(f => f.startsWith(`${stage}-`) && f.endsWith('.yaml'));
|
|
56
|
+
|
|
57
|
+
for (const file of files) {
|
|
58
|
+
const content = fs.readFileSync(path.join(customPath, file), 'utf8');
|
|
59
|
+
const data = yaml.load(content);
|
|
60
|
+
if (data && data.test_cases) {
|
|
61
|
+
benchmarks.push(...data.test_cases);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// 去重(基于 id)
|
|
67
|
+
const seen = new Set();
|
|
68
|
+
return benchmarks.filter(b => {
|
|
69
|
+
if (seen.has(b.id)) return false;
|
|
70
|
+
seen.add(b.id);
|
|
71
|
+
return true;
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* 执行评测
|
|
77
|
+
*/
|
|
78
|
+
async evaluate(stage, options = {}) {
|
|
79
|
+
const benchmarks = this.loadBenchmarks(stage);
|
|
80
|
+
const results = [];
|
|
81
|
+
const startTime = Date.now();
|
|
82
|
+
|
|
83
|
+
// 执行每个 benchmark
|
|
84
|
+
for (const benchmark of benchmarks) {
|
|
85
|
+
const result = await this.evaluateBenchmark(benchmark, stage);
|
|
86
|
+
results.push(result);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// 计算综合得分
|
|
90
|
+
const summary = this.calculateSummary(results);
|
|
91
|
+
const duration = Date.now() - startTime;
|
|
92
|
+
|
|
93
|
+
// 生成报告
|
|
94
|
+
const report = {
|
|
95
|
+
run_id: this.generateRunId(),
|
|
96
|
+
stage,
|
|
97
|
+
timestamp: new Date().toISOString(),
|
|
98
|
+
trigger: options.trigger || 'manual',
|
|
99
|
+
summary: {
|
|
100
|
+
...summary,
|
|
101
|
+
duration_ms: duration
|
|
102
|
+
},
|
|
103
|
+
results,
|
|
104
|
+
recommendations: this.generateRecommendations(results),
|
|
105
|
+
metrics: {
|
|
106
|
+
total_duration_ms: duration,
|
|
107
|
+
benchmarks_count: benchmarks.length,
|
|
108
|
+
layer1_duration_ms: results.reduce((sum, r) => sum + (r.layer1_duration_ms || 0), 0),
|
|
109
|
+
layer2_duration_ms: results.reduce((sum, r) => sum + (r.layer2_duration_ms || 0), 0)
|
|
110
|
+
}
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
// 保存报告
|
|
114
|
+
this.saveReport(report);
|
|
115
|
+
|
|
116
|
+
return report;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* 执行单个 benchmark
|
|
121
|
+
*/
|
|
122
|
+
async evaluateBenchmark(benchmark, stage) {
|
|
123
|
+
const result = {
|
|
124
|
+
benchmark_id: benchmark.id,
|
|
125
|
+
name: benchmark.name,
|
|
126
|
+
status: 'pending',
|
|
127
|
+
score: 0,
|
|
128
|
+
layer1_result: null,
|
|
129
|
+
layer2_result: null,
|
|
130
|
+
issues: [],
|
|
131
|
+
layer1_duration_ms: 0,
|
|
132
|
+
layer2_duration_ms: 0
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const startTime = Date.now();
|
|
136
|
+
|
|
137
|
+
try {
|
|
138
|
+
// Step 1: Layer 1 确定性验证
|
|
139
|
+
const layer1Start = Date.now();
|
|
140
|
+
result.layer1_result = await this.executeLayer1(benchmark, stage);
|
|
141
|
+
result.layer1_duration_ms = Date.now() - layer1Start;
|
|
142
|
+
|
|
143
|
+
// Step 2: Layer 2 AI 审查(如果 Layer 1 通过或需要深度评估)
|
|
144
|
+
if (result.layer1_result.status === 'passed' || benchmark.force_layer2) {
|
|
145
|
+
const layer2Start = Date.now();
|
|
146
|
+
result.layer2_result = await this.executeLayer2(benchmark, stage);
|
|
147
|
+
result.layer2_duration_ms = Date.now() - layer2Start;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// 计算最终状态
|
|
151
|
+
if (result.layer1_result.status === 'failed') {
|
|
152
|
+
result.status = 'failed';
|
|
153
|
+
result.score = result.layer1_result.score || 0;
|
|
154
|
+
} else if (result.layer2_result && result.layer2_result.status === 'failed') {
|
|
155
|
+
result.status = 'failed';
|
|
156
|
+
result.score = result.layer2_result.score || 50;
|
|
157
|
+
} else {
|
|
158
|
+
result.status = 'passed';
|
|
159
|
+
result.score = result.layer2_result?.score || result.layer1_result.score || 100;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// 收集问题
|
|
163
|
+
result.issues = this.collectIssues(result);
|
|
164
|
+
|
|
165
|
+
} catch (error) {
|
|
166
|
+
result.status = 'error';
|
|
167
|
+
result.error = error.message;
|
|
168
|
+
result.score = 0;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return result;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* 执行 Layer 1 验证
|
|
176
|
+
*/
|
|
177
|
+
async executeLayer1(benchmark, stage) {
|
|
178
|
+
const result = {
|
|
179
|
+
status: 'passed',
|
|
180
|
+
score: 100,
|
|
181
|
+
checks: [],
|
|
182
|
+
details: {}
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
const criteria = benchmark.criteria || [];
|
|
186
|
+
let passedCount = 0;
|
|
187
|
+
|
|
188
|
+
for (const criterion of criteria) {
|
|
189
|
+
const checkResult = await this.evaluateCriterion(criterion, stage);
|
|
190
|
+
result.checks.push(checkResult);
|
|
191
|
+
|
|
192
|
+
if (checkResult.passed) {
|
|
193
|
+
passedCount++;
|
|
194
|
+
} else {
|
|
195
|
+
result.status = 'failed';
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// 计算 Layer 1 得分
|
|
200
|
+
if (criteria.length > 0) {
|
|
201
|
+
result.score = (passedCount / criteria.length) * 100;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return result;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* 评估单个准则
|
|
209
|
+
*/
|
|
210
|
+
async evaluateCriterion(criterion, stage) {
|
|
211
|
+
const check = {
|
|
212
|
+
criterion,
|
|
213
|
+
passed: false,
|
|
214
|
+
actual: null,
|
|
215
|
+
expected: null
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
// 解析准则类型
|
|
219
|
+
if (typeof criterion === 'string') {
|
|
220
|
+
// 字符串格式的准则
|
|
221
|
+
if (criterion.includes('>=')) {
|
|
222
|
+
// 数量比较,如 "澄清问题数量 >= 3"
|
|
223
|
+
const match = criterion.match(/(.+)>=\s*(\d+)/);
|
|
224
|
+
if (match) {
|
|
225
|
+
const [, field, threshold] = match;
|
|
226
|
+
check.actual = await this.getFieldValue(field.trim(), stage);
|
|
227
|
+
check.expected = parseInt(threshold, 10);
|
|
228
|
+
check.passed = check.actual >= check.expected;
|
|
229
|
+
}
|
|
230
|
+
} else if (criterion.includes('存在')) {
|
|
231
|
+
// 文件存在性检查
|
|
232
|
+
const filePattern = criterion.replace(/.*存在/, '').trim();
|
|
233
|
+
check.actual = await this.checkFileExists(filePattern);
|
|
234
|
+
check.expected = true;
|
|
235
|
+
check.passed = check.actual === true;
|
|
236
|
+
} else {
|
|
237
|
+
// 其他字符串准则,标记为需要 Layer 2
|
|
238
|
+
check.passed = true; // 默认通过,交给 Layer 2
|
|
239
|
+
check.needsLayer2 = true;
|
|
240
|
+
}
|
|
241
|
+
} else if (typeof criterion === 'object') {
|
|
242
|
+
// 对象格式的准则
|
|
243
|
+
check.passed = await this.evaluateObjectCriterion(criterion, stage);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return check;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* 获取字段值
|
|
251
|
+
*/
|
|
252
|
+
async getFieldValue(field, stage) {
|
|
253
|
+
// 根据字段名从不同来源获取值
|
|
254
|
+
const fieldMappings = {
|
|
255
|
+
'澄清问题数量': () => this.countClarificationQuestions(),
|
|
256
|
+
'字段完整率': () => this.calculateFieldCompleteness(stage),
|
|
257
|
+
'测试覆盖率': () => this.getTestCoverage(),
|
|
258
|
+
'复杂度': () => this.getCodeComplexity(),
|
|
259
|
+
'行覆盖率': () => this.getLineCoverage(),
|
|
260
|
+
'分支覆盖率': () => this.getBranchCoverage()
|
|
261
|
+
};
|
|
262
|
+
|
|
263
|
+
const getter = fieldMappings[field];
|
|
264
|
+
if (getter) {
|
|
265
|
+
return await getter();
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return 0;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* 检查文件是否存在
|
|
273
|
+
*/
|
|
274
|
+
async checkFileExists(pattern) {
|
|
275
|
+
const fullPath = path.join(this.projectRoot, pattern);
|
|
276
|
+
return fs.existsSync(fullPath);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* 评估对象格式的准则
|
|
281
|
+
*/
|
|
282
|
+
async evaluateObjectCriterion(criterion, stage) {
|
|
283
|
+
// 根据 criterion.type 执行不同的检查
|
|
284
|
+
switch (criterion.type) {
|
|
285
|
+
case 'file-exists':
|
|
286
|
+
return fs.existsSync(path.join(this.projectRoot, criterion.path));
|
|
287
|
+
|
|
288
|
+
case 'field-exists':
|
|
289
|
+
return this.checkFieldExists(criterion.file, criterion.field);
|
|
290
|
+
|
|
291
|
+
case 'schema-valid':
|
|
292
|
+
return this.validateSchema(criterion.file, criterion.schema);
|
|
293
|
+
|
|
294
|
+
default:
|
|
295
|
+
return true; // 未知类型交给 Layer 2
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* 执行 Layer 2 AI 审查
|
|
301
|
+
*/
|
|
302
|
+
async executeLayer2(benchmark, stage) {
|
|
303
|
+
// Layer 2 需要调用 AI Agent 进行审查
|
|
304
|
+
// 这里返回一个占位结果,实际实现需要集成 AI Agent
|
|
305
|
+
|
|
306
|
+
return {
|
|
307
|
+
status: 'passed',
|
|
308
|
+
score: 85,
|
|
309
|
+
findings: [],
|
|
310
|
+
reviewer: 'ai-agent',
|
|
311
|
+
reviewed_at: new Date().toISOString()
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* 计算汇总结果
|
|
317
|
+
*/
|
|
318
|
+
calculateSummary(results) {
|
|
319
|
+
const total = results.length;
|
|
320
|
+
const passed = results.filter(r => r.status === 'passed').length;
|
|
321
|
+
const failed = results.filter(r => r.status === 'failed').length;
|
|
322
|
+
|
|
323
|
+
// 计算加权平均分
|
|
324
|
+
let totalWeight = 0;
|
|
325
|
+
let weightedScore = 0;
|
|
326
|
+
|
|
327
|
+
for (const result of results) {
|
|
328
|
+
// 根据 benchmark 优先级分配权重
|
|
329
|
+
const weight = result.priority === 'P0' ? 3 :
|
|
330
|
+
result.priority === 'P1' ? 2 : 1;
|
|
331
|
+
weightedScore += result.score * weight;
|
|
332
|
+
totalWeight += weight;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const overallScore = totalWeight > 0 ? weightedScore / totalWeight : 0;
|
|
336
|
+
|
|
337
|
+
// 确定等级
|
|
338
|
+
const grade = this.calculateGrade(overallScore);
|
|
339
|
+
|
|
340
|
+
return {
|
|
341
|
+
total,
|
|
342
|
+
passed,
|
|
343
|
+
failed,
|
|
344
|
+
overall_score: Math.round(overallScore),
|
|
345
|
+
grade,
|
|
346
|
+
pass_rate: total > 0 ? (passed / total * 100).toFixed(2) : 0
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* 计算等级
|
|
352
|
+
*/
|
|
353
|
+
calculateGrade(score) {
|
|
354
|
+
if (score >= 95) return 'A+';
|
|
355
|
+
if (score >= 90) return 'A';
|
|
356
|
+
if (score >= 85) return 'B+';
|
|
357
|
+
if (score >= 80) return 'B';
|
|
358
|
+
if (score >= 70) return 'C';
|
|
359
|
+
return 'D';
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* 生成改进建议
|
|
364
|
+
*/
|
|
365
|
+
generateRecommendations(results) {
|
|
366
|
+
const recommendations = [];
|
|
367
|
+
|
|
368
|
+
for (const result of results) {
|
|
369
|
+
if (result.status === 'failed' || result.status === 'error') {
|
|
370
|
+
const rec = {
|
|
371
|
+
id: `rec-${result.benchmark_id}`,
|
|
372
|
+
benchmark_id: result.benchmark_id,
|
|
373
|
+
benchmark_name: result.name,
|
|
374
|
+
priority: this.inferPriority(result),
|
|
375
|
+
issue: this.describeIssue(result),
|
|
376
|
+
suggestion: this.generateSuggestion(result),
|
|
377
|
+
auto_fixable: this.isAutoFixable(result)
|
|
378
|
+
};
|
|
379
|
+
recommendations.push(rec);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// 按优先级排序
|
|
384
|
+
const priorityOrder = { P0: 0, P1: 1, P2: 2 };
|
|
385
|
+
recommendations.sort((a, b) => priorityOrder[a.priority] - priorityOrder[b.priority]);
|
|
386
|
+
|
|
387
|
+
return recommendations;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* 推断优先级
|
|
392
|
+
*/
|
|
393
|
+
inferPriority(result) {
|
|
394
|
+
// 基于 benchmark id 或分数推断
|
|
395
|
+
if (result.benchmark_id?.startsWith('EXP-')) return 'P0';
|
|
396
|
+
if (result.score < 50) return 'P0';
|
|
397
|
+
if (result.score < 70) return 'P1';
|
|
398
|
+
return 'P2';
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
/**
|
|
402
|
+
* 描述问题
|
|
403
|
+
*/
|
|
404
|
+
describeIssue(result) {
|
|
405
|
+
if (result.layer1_result?.status === 'failed') {
|
|
406
|
+
const failedChecks = result.layer1_result.checks.filter(c => !c.passed);
|
|
407
|
+
return failedChecks.map(c => c.criterion).join('; ');
|
|
408
|
+
}
|
|
409
|
+
if (result.layer2_result?.status === 'failed') {
|
|
410
|
+
return 'AI 审查未通过';
|
|
411
|
+
}
|
|
412
|
+
return '未知问题';
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* 生成建议
|
|
417
|
+
*/
|
|
418
|
+
generateSuggestion(result) {
|
|
419
|
+
// 基于问题和阶段生成具体建议
|
|
420
|
+
const suggestions = {
|
|
421
|
+
'EXP-001': '补充至少3个澄清问题,覆盖功能、非功能和边界条件',
|
|
422
|
+
'REQ-001': '完善需求文档,确保所有必填字段完整',
|
|
423
|
+
'DES-001': '补充架构设计文档,包含技术选型理由',
|
|
424
|
+
'IMP-001': '确保代码实现符合设计文档',
|
|
425
|
+
'IMP-002': '优化代码质量,确保测试覆盖率达标',
|
|
426
|
+
'TST-001': '补充测试用例,提高代码覆盖率'
|
|
427
|
+
};
|
|
428
|
+
|
|
429
|
+
return suggestions[result.benchmark_id] || '请参考 benchmark 要求完善产出物';
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* 判断是否可自动修复
|
|
434
|
+
*/
|
|
435
|
+
isAutoFixable(result) {
|
|
436
|
+
// 某些问题可以自动修复
|
|
437
|
+
const autoFixableBenchmarks = ['format', 'lint', 'import'];
|
|
438
|
+
return autoFixableBenchmarks.some(id => result.benchmark_id?.includes(id));
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* 收集问题列表
|
|
443
|
+
*/
|
|
444
|
+
collectIssues(result) {
|
|
445
|
+
const issues = [];
|
|
446
|
+
|
|
447
|
+
if (result.layer1_result?.checks) {
|
|
448
|
+
for (const check of result.layer1_result.checks) {
|
|
449
|
+
if (!check.passed) {
|
|
450
|
+
issues.push({
|
|
451
|
+
layer: 1,
|
|
452
|
+
type: 'criterion-failed',
|
|
453
|
+
description: check.criterion,
|
|
454
|
+
actual: check.actual,
|
|
455
|
+
expected: check.expected
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
if (result.layer2_result?.findings) {
|
|
462
|
+
for (const finding of result.layer2_result.findings) {
|
|
463
|
+
issues.push({
|
|
464
|
+
layer: 2,
|
|
465
|
+
type: finding.type,
|
|
466
|
+
description: finding.description,
|
|
467
|
+
severity: finding.severity
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
return issues;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* 保存报告
|
|
477
|
+
*/
|
|
478
|
+
saveReport(report) {
|
|
479
|
+
const filename = `${report.stage}-${report.run_id}.json`;
|
|
480
|
+
const filepath = path.join(this.resultsPath, filename);
|
|
481
|
+
fs.writeFileSync(filepath, JSON.stringify(report, null, 2));
|
|
482
|
+
return filepath;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* 生成运行 ID
|
|
487
|
+
*/
|
|
488
|
+
generateRunId() {
|
|
489
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
490
|
+
const random = Math.random().toString(36).substr(2, 6);
|
|
491
|
+
return `${timestamp}-${random}`;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
/**
|
|
495
|
+
* 获取最新报告
|
|
496
|
+
*/
|
|
497
|
+
getLatestReport(stage) {
|
|
498
|
+
const files = fs.readdirSync(this.resultsPath)
|
|
499
|
+
.filter(f => f.startsWith(`${stage}-`) && f.endsWith('.json'))
|
|
500
|
+
.sort()
|
|
501
|
+
.reverse();
|
|
502
|
+
|
|
503
|
+
if (files.length === 0) {
|
|
504
|
+
return null;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
const content = fs.readFileSync(path.join(this.resultsPath, files[0]), 'utf8');
|
|
508
|
+
return JSON.parse(content);
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
/**
|
|
512
|
+
* 获取历史趋势
|
|
513
|
+
*/
|
|
514
|
+
getTrend(stage, limit = 10) {
|
|
515
|
+
const files = fs.readdirSync(this.resultsPath)
|
|
516
|
+
.filter(f => f.startsWith(`${stage}-`) && f.endsWith('.json'))
|
|
517
|
+
.sort()
|
|
518
|
+
.reverse()
|
|
519
|
+
.slice(0, limit);
|
|
520
|
+
|
|
521
|
+
return files.map(f => {
|
|
522
|
+
const content = fs.readFileSync(path.join(this.resultsPath, f), 'utf8');
|
|
523
|
+
const report = JSON.parse(content);
|
|
524
|
+
return {
|
|
525
|
+
timestamp: report.timestamp,
|
|
526
|
+
score: report.summary.overall_score,
|
|
527
|
+
grade: report.summary.grade,
|
|
528
|
+
pass_rate: report.summary.pass_rate
|
|
529
|
+
};
|
|
530
|
+
}).reverse();
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
// 辅助方法(需要根据实际情况实现)
|
|
534
|
+
countClarificationQuestions() {
|
|
535
|
+
// 从 clarifications.md 中统计问题数量
|
|
536
|
+
try {
|
|
537
|
+
const filePath = path.join(this.projectRoot, 'specs', 'clarifications.md');
|
|
538
|
+
if (!fs.existsSync(filePath)) return 0;
|
|
539
|
+
|
|
540
|
+
const content = fs.readFileSync(filePath, 'utf8');
|
|
541
|
+
const matches = content.match(/^\s*-\s*\*\*Q\d+:/gm);
|
|
542
|
+
return matches ? matches.length : 0;
|
|
543
|
+
} catch {
|
|
544
|
+
return 0;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
calculateFieldCompleteness(stage) {
|
|
549
|
+
// 计算字段完整率
|
|
550
|
+
return 85; // 占位实现
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
getTestCoverage() {
|
|
554
|
+
// 获取测试覆盖率
|
|
555
|
+
try {
|
|
556
|
+
const coveragePath = path.join(this.projectRoot, 'coverage', 'coverage-summary.json');
|
|
557
|
+
if (!fs.existsSync(coveragePath)) return 0;
|
|
558
|
+
|
|
559
|
+
const data = JSON.parse(fs.readFileSync(coveragePath, 'utf8'));
|
|
560
|
+
return data.total?.lines?.pct || 0;
|
|
561
|
+
} catch {
|
|
562
|
+
return 0;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
getCodeComplexity() {
|
|
567
|
+
// 获取代码复杂度
|
|
568
|
+
return 8; // 占位实现
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
getLineCoverage() {
|
|
572
|
+
return this.getTestCoverage();
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
getBranchCoverage() {
|
|
576
|
+
try {
|
|
577
|
+
const coveragePath = path.join(this.projectRoot, 'coverage', 'coverage-summary.json');
|
|
578
|
+
if (!fs.existsSync(coveragePath)) return 0;
|
|
579
|
+
|
|
580
|
+
const data = JSON.parse(fs.readFileSync(coveragePath, 'utf8'));
|
|
581
|
+
return data.total?.branches?.pct || 0;
|
|
582
|
+
} catch {
|
|
583
|
+
return 0;
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
checkFieldExists(file, field) {
|
|
588
|
+
try {
|
|
589
|
+
const filePath = path.join(this.projectRoot, file);
|
|
590
|
+
if (!fs.existsSync(filePath)) return false;
|
|
591
|
+
|
|
592
|
+
const content = fs.readFileSync(filePath, 'utf8');
|
|
593
|
+
return content.includes(field);
|
|
594
|
+
} catch {
|
|
595
|
+
return false;
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
validateSchema(file, schema) {
|
|
600
|
+
// 验证文件是否符合 schema
|
|
601
|
+
return true; // 占位实现
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
module.exports = BenchmarkEvaluator;
|