@chongyan/autospec 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. package/LICENSE +21 -0
  2. package/README.en.md +472 -0
  3. package/README.md +476 -0
  4. package/bin/autospec.js +3 -0
  5. package/knowledge/README.md +144 -0
  6. package/knowledge/checklists/code.md +182 -0
  7. package/knowledge/checklists/design.md +196 -0
  8. package/knowledge/checklists/release.md +70 -0
  9. package/knowledge/checklists/requirement.md +169 -0
  10. package/knowledge/checklists/test.md +46 -0
  11. package/knowledge/config/README.en.md +44 -0
  12. package/knowledge/config/README.md +44 -0
  13. package/knowledge/config/role-composition.yaml +98 -0
  14. package/knowledge/config/role-extensions.yaml +140 -0
  15. package/knowledge/config/skill-compositions.yaml +142 -0
  16. package/knowledge/config/team-stage.yaml +95 -0
  17. package/knowledge/config/team-tasks.yaml +139 -0
  18. package/knowledge/config/team-triggers.yaml +198 -0
  19. package/knowledge/config/validation-patterns.yaml +137 -0
  20. package/knowledge/domain/README.md +115 -0
  21. package/knowledge/domain/flows/README.md +194 -0
  22. package/knowledge/domain/glossary.md +143 -0
  23. package/knowledge/domain/rules.md +138 -0
  24. package/knowledge/environment/README.en.md +36 -0
  25. package/knowledge/environment/README.md +87 -0
  26. package/knowledge/environment/component-knowledge.md +316 -0
  27. package/knowledge/environment/detection-patterns.yaml +502 -0
  28. package/knowledge/environment/middleware-knowledge.md +237 -0
  29. package/knowledge/environment/template-registry.md +321 -0
  30. package/knowledge/guides/domain-driven-design.md +345 -0
  31. package/knowledge/guides/knowledge-management.md +369 -0
  32. package/knowledge/guides/requirement-engineering.md +329 -0
  33. package/knowledge/guides/stages/ai-effect-evaluator.md +93 -0
  34. package/knowledge/guides/stages/code-implementer.md +205 -0
  35. package/knowledge/guides/stages/code-reviewer.md +111 -0
  36. package/knowledge/guides/stages/consistency-checker.md +177 -0
  37. package/knowledge/guides/stages/design-planner.md +401 -0
  38. package/knowledge/guides/stages/design-reviewer.md +83 -0
  39. package/knowledge/guides/stages/integration-test-runner.md +105 -0
  40. package/knowledge/guides/stages/release-checker.md +205 -0
  41. package/knowledge/guides/stages/requirement-analyzer.md +195 -0
  42. package/knowledge/guides/stages/requirement-reviewer.md +83 -0
  43. package/knowledge/guides/stages/security-reviewer.md +89 -0
  44. package/knowledge/guides/stages/test-context-analyzer.md +250 -0
  45. package/knowledge/guides/stages/test-generator.md +241 -0
  46. package/knowledge/guides/stages/test-planner.md +183 -0
  47. package/knowledge/guides/stages/test-reviewer.md +76 -0
  48. package/knowledge/guides/stages/unit-test-runner.md +83 -0
  49. package/knowledge/guides/support/ai-agent-analyzer.md +362 -0
  50. package/knowledge/guides/support/ai-anomaly-analyzer.md +213 -0
  51. package/knowledge/guides/support/ai-artifact-evaluator.md +192 -0
  52. package/knowledge/guides/support/ai-capability-analyzer.md +193 -0
  53. package/knowledge/guides/support/ai-component-analyzer.md +169 -0
  54. package/knowledge/guides/support/ai-data-validator.md +276 -0
  55. package/knowledge/guides/support/ai-evaluation-planner.md +374 -0
  56. package/knowledge/guides/support/ai-path-evaluator.md +274 -0
  57. package/knowledge/guides/support/ai-pipeline-evaluator.md +219 -0
  58. package/knowledge/guides/support/ai-rag-analyzer.md +339 -0
  59. package/knowledge/guides/support/ai-task-assessor.md +418 -0
  60. package/knowledge/guides/support/ai-test-diagnostics.md +133 -0
  61. package/knowledge/guides/support/complexity-assessor.md +268 -0
  62. package/knowledge/guides/support/component-discovery.md +183 -0
  63. package/knowledge/guides/support/environment-scanner.md +207 -0
  64. package/knowledge/guides/support/environment-validator.md +207 -0
  65. package/knowledge/guides/support/knowledge-generator.md +234 -0
  66. package/knowledge/guides/support/methodology-extractor.md +55 -0
  67. package/knowledge/guides/support/pipeline-protocol.md +438 -0
  68. package/knowledge/guides/support/practice-logger.md +359 -0
  69. package/knowledge/guides/support/scope-inference.md +174 -0
  70. package/knowledge/guides/support/skill-distiller.md +91 -0
  71. package/knowledge/guides/support/skill-updater.md +45 -0
  72. package/knowledge/guides/support/skill-validator.md +72 -0
  73. package/knowledge/guides/support/team-orchestrator.md +323 -0
  74. package/knowledge/guides/support/tech-stack-analyzer.md +139 -0
  75. package/knowledge/guides/support/test-runner.md +254 -0
  76. package/knowledge/guides/system-design.md +352 -0
  77. package/knowledge/organization/ai-native-team.md +318 -0
  78. package/knowledge/organization/team-metrics.md +228 -0
  79. package/knowledge/principles/constitution.md +134 -0
  80. package/knowledge/principles/core-principles.md +368 -0
  81. package/knowledge/principles/design-philosophy.md +877 -0
  82. package/knowledge/principles/evolution.md +553 -0
  83. package/knowledge/process/01-requirement.md +113 -0
  84. package/knowledge/process/02-design.md +123 -0
  85. package/knowledge/process/03-implementation.md +90 -0
  86. package/knowledge/process/04-review.md +80 -0
  87. package/knowledge/process/05-testing.md +90 -0
  88. package/knowledge/process/06-delivery.md +88 -0
  89. package/knowledge/process/README.en.md +38 -0
  90. package/knowledge/process/README.md +48 -0
  91. package/knowledge/process/ai-sdlc.md +475 -0
  92. package/knowledge/process/overview.md +319 -0
  93. package/knowledge/standards/code-review.md +876 -0
  94. package/knowledge/standards/coding-style.md +940 -0
  95. package/knowledge/standards/data-consistency.md +1085 -0
  96. package/knowledge/standards/document-versioning.md +210 -0
  97. package/knowledge/standards/risk-detection.md +186 -0
  98. package/knowledge/templates/ai-evaluation.md +150 -0
  99. package/knowledge/templates/api-design.md +117 -0
  100. package/knowledge/templates/database-design.md +132 -0
  101. package/knowledge/templates/domain-driven-design.md +321 -0
  102. package/knowledge/templates/product-proposal.md +201 -0
  103. package/knowledge/templates/system-design.md +227 -0
  104. package/knowledge/templates/task-breakdown.md +107 -0
  105. package/knowledge/templates/test-case.md +170 -0
  106. package/package.json +53 -0
  107. package/plugins/.claude-plugin/plugin.json +134 -0
  108. package/plugins/agents/roles/ai-engineer.md +129 -0
  109. package/plugins/agents/roles/backend-engineer.md +165 -0
  110. package/plugins/agents/roles/ceo.md +94 -0
  111. package/plugins/agents/roles/data-engineer.md +135 -0
  112. package/plugins/agents/roles/devops-engineer.md +181 -0
  113. package/plugins/agents/roles/frontend-engineer.md +129 -0
  114. package/plugins/agents/roles/product-owner.md +98 -0
  115. package/plugins/agents/roles/quality-engineer.md +129 -0
  116. package/plugins/agents/roles/security-engineer.md +180 -0
  117. package/plugins/agents/roles/tech-lead.md +97 -0
  118. package/plugins/agents/support/blind-comparator.md +88 -0
  119. package/plugins/agents/support/consistency-checker.md +103 -0
  120. package/plugins/agents/support/failure-diagnostician.md +141 -0
  121. package/plugins/agents/support/independent-reviewer.md +80 -0
  122. package/plugins/agents/support/safety-auditor.md +121 -0
  123. package/plugins/agents/support/skill-benchmarker.md +86 -0
  124. package/plugins/agents/support/skill-forger.md +105 -0
  125. package/plugins/agents/support/stage-gate-evaluator.md +121 -0
  126. package/plugins/agents/support/test-coverage-reviewer.md +73 -0
  127. package/plugins/benchmarks/templates/README.md +44 -0
  128. package/plugins/benchmarks/templates/commands/explore-template.yaml +48 -0
  129. package/plugins/benchmarks/templates/pipeline/agile-template.yaml +84 -0
  130. package/plugins/benchmarks/templates/pipeline/waterfall-template.yaml +106 -0
  131. package/plugins/benchmarks/templates/skills/requirement-analyzer-template.yaml +48 -0
  132. package/plugins/commands/README.en.md +96 -0
  133. package/plugins/commands/README.md +96 -0
  134. package/plugins/commands/apply.md +191 -0
  135. package/plugins/commands/archive.md +76 -0
  136. package/plugins/commands/env-export.md +79 -0
  137. package/plugins/commands/env-sync.md +640 -0
  138. package/plugins/commands/env-template.md +223 -0
  139. package/plugins/commands/env-update.md +264 -0
  140. package/plugins/commands/env-validate.md +176 -0
  141. package/plugins/commands/env.md +79 -0
  142. package/plugins/commands/explore.md +76 -0
  143. package/plugins/commands/field-evolve.md +536 -0
  144. package/plugins/commands/memory.md +249 -0
  145. package/plugins/commands/project-evolve.md +821 -0
  146. package/plugins/commands/propose.md +93 -0
  147. package/plugins/commands/review.md +140 -0
  148. package/plugins/commands/run.md +224 -0
  149. package/plugins/commands/status.md +62 -0
  150. package/plugins/commands/validate.md +108 -0
  151. package/plugins/hooks/README.en.md +56 -0
  152. package/plugins/hooks/README.md +56 -0
  153. package/plugins/hooks/ai-project-guard.js +329 -0
  154. package/plugins/hooks/artifact-evaluation-hook.js +237 -0
  155. package/plugins/hooks/constitution-guard.js +211 -0
  156. package/plugins/hooks/environment-autocommit.js +264 -0
  157. package/plugins/hooks/environment-manager.js +778 -0
  158. package/plugins/hooks/execution-tracker.js +354 -0
  159. package/plugins/hooks/frozen-zone-guard.js +140 -0
  160. package/plugins/hooks/layer1-validator.js +423 -0
  161. package/plugins/hooks/lib/artifact-evaluator.js +414 -0
  162. package/plugins/hooks/lib/benchmarks/change-detector.js +390 -0
  163. package/plugins/hooks/lib/benchmarks/evaluator.js +605 -0
  164. package/plugins/hooks/lib/benchmarks/integration-example.js +169 -0
  165. package/plugins/hooks/lib/data-and-ai-detector.js +275 -0
  166. package/plugins/hooks/lib/detection-pattern-loader.js +865 -0
  167. package/plugins/hooks/lib/directory-discovery.js +395 -0
  168. package/plugins/hooks/lib/environment-config-loader.js +341 -0
  169. package/plugins/hooks/lib/environment-detector.js +553 -0
  170. package/plugins/hooks/lib/environment-evolver.js +564 -0
  171. package/plugins/hooks/lib/environment-registry.js +813 -0
  172. package/plugins/hooks/lib/execution-path.js +427 -0
  173. package/plugins/hooks/lib/hook-error-recorder.js +245 -0
  174. package/plugins/hooks/lib/hook-logger.js +538 -0
  175. package/plugins/hooks/lib/hook-runner.js +97 -0
  176. package/plugins/hooks/lib/hook-runner.sh +44 -0
  177. package/plugins/hooks/lib/hook-state-manager.js +480 -0
  178. package/plugins/hooks/lib/memory-extractor.js +377 -0
  179. package/plugins/hooks/lib/memory-manager.js +673 -0
  180. package/plugins/hooks/lib/metrics-analyzer.js +489 -0
  181. package/plugins/hooks/lib/project-evolution/auto-fixer.js +511 -0
  182. package/plugins/hooks/lib/project-evolution/memory-manager.js +346 -0
  183. package/plugins/hooks/lib/project-evolution/pattern-detector.js +476 -0
  184. package/plugins/hooks/lib/project-evolution/semantic-indexer.js +480 -0
  185. package/plugins/hooks/lib/project-structure-detector.js +326 -0
  186. package/plugins/hooks/lib/rollback-tracker.js +346 -0
  187. package/plugins/hooks/lib/source-code-scanner.js +596 -0
  188. package/plugins/hooks/lib/technology-stack-detector.js +374 -0
  189. package/plugins/hooks/lib/test-failure-analyzer.js +375 -0
  190. package/plugins/hooks/lib/test-failure-fixer.js +268 -0
  191. package/plugins/hooks/lib/trace-context.js +277 -0
  192. package/plugins/hooks/lib/validation-patterns.js +415 -0
  193. package/plugins/hooks/memory-sync.js +171 -0
  194. package/plugins/hooks/pipeline-observer.js +413 -0
  195. package/plugins/hooks/scope-sentinel.js +204 -0
  196. package/plugins/hooks/trace-initialization.js +169 -0
  197. package/plugins/memory/templates/code-quality.yaml +149 -0
  198. package/plugins/memory/templates/multi-system.yaml +155 -0
  199. package/plugins/memory/templates/team-habits.yaml +119 -0
  200. package/plugins/memory/templates/testing.yaml +121 -0
  201. package/plugins/skills/README.en.md +47 -0
  202. package/plugins/skills/README.md +104 -0
  203. package/plugins/skills/benchmark-executor/README.md +93 -0
  204. package/plugins/skills/benchmark-executor/SKILL.md +647 -0
  205. package/plugins/skills/benchmark-generator/SKILL.md +349 -0
  206. package/plugins/skills/delivery-stage/SKILL.md +203 -0
  207. package/plugins/skills/design-stage/SKILL.md +216 -0
  208. package/plugins/skills/evolution-process/SKILL.md +291 -0
  209. package/plugins/skills/exploration-phase/SKILL.md +133 -0
  210. package/plugins/skills/implementation-stage/SKILL.md +179 -0
  211. package/plugins/skills/layer1-validation/SKILL.md +79 -0
  212. package/plugins/skills/pending-dashboard/SKILL.md +109 -0
  213. package/plugins/skills/project-evolution/SKILL.md +847 -0
  214. package/plugins/skills/requirement-stage/SKILL.md +183 -0
  215. package/plugins/skills/skill-forge/SKILL.md +223 -0
  216. package/plugins/skills/skill-forge/references/description-guide.md +92 -0
  217. package/plugins/skills/skill-forge/references/quality-rubric.md +104 -0
  218. package/plugins/skills/skill-forge/references/skill-template.md +106 -0
  219. package/plugins/skills/startup-guard/SKILL.md +38 -0
  220. package/plugins/skills/testing-stage/SKILL.md +195 -0
  221. package/scripts/cli/global-init.js +288 -0
  222. package/scripts/cli/global.js +324 -0
  223. package/scripts/cli/index.js +55 -0
  224. package/scripts/cli/init.js +382 -0
  225. package/scripts/cli/list.js +69 -0
  226. package/scripts/cli/org.js +340 -0
  227. package/scripts/cli/update.js +44 -0
  228. package/scripts/config/commands.config.js +145 -0
  229. package/scripts/config/hooks.config.js +197 -0
  230. package/scripts/evolution/evolution-router.js +273 -0
  231. package/scripts/evolution/evolution-signal-collector.js +307 -0
  232. package/scripts/evolution/knowledge-loader.js +346 -0
  233. package/scripts/evolution/marketplace.js +317 -0
  234. package/scripts/evolution/version-manager.js +371 -0
  235. package/scripts/install/agents.js +106 -0
  236. package/scripts/install/commands.js +133 -0
  237. package/scripts/install/constants.js +424 -0
  238. package/scripts/install/hook-logger.js +536 -0
  239. package/scripts/install/hooks.js +110 -0
  240. package/scripts/install/index.js +39 -0
  241. package/scripts/install/skills.js +95 -0
  242. package/scripts/postinstall.js +25 -0
  243. package/scripts/state.js +376 -0
@@ -0,0 +1,605 @@
1
+ /**
2
+ * Benchmarks Evaluator
3
+ * 评测执行器 - 负责执行 benchmarks 评测,计算评分,生成报告
4
+ */
5
+
6
+ const fs = require('fs');
7
+ const path = require('path');
8
+ const yaml = require('js-yaml');
9
+
10
+ class BenchmarkEvaluator {
11
+ constructor(projectRoot) {
12
+ this.projectRoot = projectRoot;
13
+ this.resultsPath = path.join(projectRoot, '.autospec', 'benchmarks', 'results');
14
+ this.benchmarksPath = path.join(projectRoot, '.autospec', 'benchmarks');
15
+ }
16
+
17
+ /**
18
+ * 初始化评测器
19
+ */
20
+ init() {
21
+ if (!fs.existsSync(this.resultsPath)) {
22
+ fs.mkdirSync(this.resultsPath, { recursive: true });
23
+ }
24
+ return this;
25
+ }
26
+
27
+ /**
28
+ * 加载 benchmarks
29
+ */
30
+ loadBenchmarks(stage) {
31
+ const benchmarks = [];
32
+
33
+ // 加载自动生成的 benchmarks
34
+ const autoGenPath = path.join(this.benchmarksPath, 'auto-generated');
35
+ if (fs.existsSync(autoGenPath)) {
36
+ const files = fs.readdirSync(autoGenPath)
37
+ .filter(f => f.startsWith(`${stage}-`) && f.endsWith('.yaml'))
38
+ .sort()
39
+ .reverse(); // 最新的在前
40
+
41
+ if (files.length > 0) {
42
+ const latestFile = path.join(autoGenPath, files[0]);
43
+ const content = fs.readFileSync(latestFile, 'utf8');
44
+ const data = yaml.load(content);
45
+ if (data && data.test_cases) {
46
+ benchmarks.push(...data.test_cases);
47
+ }
48
+ }
49
+ }
50
+
51
+ // 加载自定义 benchmarks
52
+ const customPath = path.join(this.benchmarksPath, 'custom');
53
+ if (fs.existsSync(customPath)) {
54
+ const files = fs.readdirSync(customPath)
55
+ .filter(f => f.startsWith(`${stage}-`) && f.endsWith('.yaml'));
56
+
57
+ for (const file of files) {
58
+ const content = fs.readFileSync(path.join(customPath, file), 'utf8');
59
+ const data = yaml.load(content);
60
+ if (data && data.test_cases) {
61
+ benchmarks.push(...data.test_cases);
62
+ }
63
+ }
64
+ }
65
+
66
+ // 去重(基于 id)
67
+ const seen = new Set();
68
+ return benchmarks.filter(b => {
69
+ if (seen.has(b.id)) return false;
70
+ seen.add(b.id);
71
+ return true;
72
+ });
73
+ }
74
+
75
+ /**
76
+ * 执行评测
77
+ */
78
+ async evaluate(stage, options = {}) {
79
+ const benchmarks = this.loadBenchmarks(stage);
80
+ const results = [];
81
+ const startTime = Date.now();
82
+
83
+ // 执行每个 benchmark
84
+ for (const benchmark of benchmarks) {
85
+ const result = await this.evaluateBenchmark(benchmark, stage);
86
+ results.push(result);
87
+ }
88
+
89
+ // 计算综合得分
90
+ const summary = this.calculateSummary(results);
91
+ const duration = Date.now() - startTime;
92
+
93
+ // 生成报告
94
+ const report = {
95
+ run_id: this.generateRunId(),
96
+ stage,
97
+ timestamp: new Date().toISOString(),
98
+ trigger: options.trigger || 'manual',
99
+ summary: {
100
+ ...summary,
101
+ duration_ms: duration
102
+ },
103
+ results,
104
+ recommendations: this.generateRecommendations(results),
105
+ metrics: {
106
+ total_duration_ms: duration,
107
+ benchmarks_count: benchmarks.length,
108
+ layer1_duration_ms: results.reduce((sum, r) => sum + (r.layer1_duration_ms || 0), 0),
109
+ layer2_duration_ms: results.reduce((sum, r) => sum + (r.layer2_duration_ms || 0), 0)
110
+ }
111
+ };
112
+
113
+ // 保存报告
114
+ this.saveReport(report);
115
+
116
+ return report;
117
+ }
118
+
119
+ /**
120
+ * 执行单个 benchmark
121
+ */
122
+ async evaluateBenchmark(benchmark, stage) {
123
+ const result = {
124
+ benchmark_id: benchmark.id,
125
+ name: benchmark.name,
126
+ status: 'pending',
127
+ score: 0,
128
+ layer1_result: null,
129
+ layer2_result: null,
130
+ issues: [],
131
+ layer1_duration_ms: 0,
132
+ layer2_duration_ms: 0
133
+ };
134
+
135
+ const startTime = Date.now();
136
+
137
+ try {
138
+ // Step 1: Layer 1 确定性验证
139
+ const layer1Start = Date.now();
140
+ result.layer1_result = await this.executeLayer1(benchmark, stage);
141
+ result.layer1_duration_ms = Date.now() - layer1Start;
142
+
143
+ // Step 2: Layer 2 AI 审查(如果 Layer 1 通过或需要深度评估)
144
+ if (result.layer1_result.status === 'passed' || benchmark.force_layer2) {
145
+ const layer2Start = Date.now();
146
+ result.layer2_result = await this.executeLayer2(benchmark, stage);
147
+ result.layer2_duration_ms = Date.now() - layer2Start;
148
+ }
149
+
150
+ // 计算最终状态
151
+ if (result.layer1_result.status === 'failed') {
152
+ result.status = 'failed';
153
+ result.score = result.layer1_result.score || 0;
154
+ } else if (result.layer2_result && result.layer2_result.status === 'failed') {
155
+ result.status = 'failed';
156
+ result.score = result.layer2_result.score || 50;
157
+ } else {
158
+ result.status = 'passed';
159
+ result.score = result.layer2_result?.score || result.layer1_result.score || 100;
160
+ }
161
+
162
+ // 收集问题
163
+ result.issues = this.collectIssues(result);
164
+
165
+ } catch (error) {
166
+ result.status = 'error';
167
+ result.error = error.message;
168
+ result.score = 0;
169
+ }
170
+
171
+ return result;
172
+ }
173
+
174
+ /**
175
+ * 执行 Layer 1 验证
176
+ */
177
+ async executeLayer1(benchmark, stage) {
178
+ const result = {
179
+ status: 'passed',
180
+ score: 100,
181
+ checks: [],
182
+ details: {}
183
+ };
184
+
185
+ const criteria = benchmark.criteria || [];
186
+ let passedCount = 0;
187
+
188
+ for (const criterion of criteria) {
189
+ const checkResult = await this.evaluateCriterion(criterion, stage);
190
+ result.checks.push(checkResult);
191
+
192
+ if (checkResult.passed) {
193
+ passedCount++;
194
+ } else {
195
+ result.status = 'failed';
196
+ }
197
+ }
198
+
199
+ // 计算 Layer 1 得分
200
+ if (criteria.length > 0) {
201
+ result.score = (passedCount / criteria.length) * 100;
202
+ }
203
+
204
+ return result;
205
+ }
206
+
207
+ /**
208
+ * 评估单个准则
209
+ */
210
+ async evaluateCriterion(criterion, stage) {
211
+ const check = {
212
+ criterion,
213
+ passed: false,
214
+ actual: null,
215
+ expected: null
216
+ };
217
+
218
+ // 解析准则类型
219
+ if (typeof criterion === 'string') {
220
+ // 字符串格式的准则
221
+ if (criterion.includes('>=')) {
222
+ // 数量比较,如 "澄清问题数量 >= 3"
223
+ const match = criterion.match(/(.+)>=\s*(\d+)/);
224
+ if (match) {
225
+ const [, field, threshold] = match;
226
+ check.actual = await this.getFieldValue(field.trim(), stage);
227
+ check.expected = parseInt(threshold, 10);
228
+ check.passed = check.actual >= check.expected;
229
+ }
230
+ } else if (criterion.includes('存在')) {
231
+ // 文件存在性检查
232
+ const filePattern = criterion.replace(/.*存在/, '').trim();
233
+ check.actual = await this.checkFileExists(filePattern);
234
+ check.expected = true;
235
+ check.passed = check.actual === true;
236
+ } else {
237
+ // 其他字符串准则,标记为需要 Layer 2
238
+ check.passed = true; // 默认通过,交给 Layer 2
239
+ check.needsLayer2 = true;
240
+ }
241
+ } else if (typeof criterion === 'object') {
242
+ // 对象格式的准则
243
+ check.passed = await this.evaluateObjectCriterion(criterion, stage);
244
+ }
245
+
246
+ return check;
247
+ }
248
+
249
+ /**
250
+ * 获取字段值
251
+ */
252
+ async getFieldValue(field, stage) {
253
+ // 根据字段名从不同来源获取值
254
+ const fieldMappings = {
255
+ '澄清问题数量': () => this.countClarificationQuestions(),
256
+ '字段完整率': () => this.calculateFieldCompleteness(stage),
257
+ '测试覆盖率': () => this.getTestCoverage(),
258
+ '复杂度': () => this.getCodeComplexity(),
259
+ '行覆盖率': () => this.getLineCoverage(),
260
+ '分支覆盖率': () => this.getBranchCoverage()
261
+ };
262
+
263
+ const getter = fieldMappings[field];
264
+ if (getter) {
265
+ return await getter();
266
+ }
267
+
268
+ return 0;
269
+ }
270
+
271
+ /**
272
+ * 检查文件是否存在
273
+ */
274
+ async checkFileExists(pattern) {
275
+ const fullPath = path.join(this.projectRoot, pattern);
276
+ return fs.existsSync(fullPath);
277
+ }
278
+
279
+ /**
280
+ * 评估对象格式的准则
281
+ */
282
+ async evaluateObjectCriterion(criterion, stage) {
283
+ // 根据 criterion.type 执行不同的检查
284
+ switch (criterion.type) {
285
+ case 'file-exists':
286
+ return fs.existsSync(path.join(this.projectRoot, criterion.path));
287
+
288
+ case 'field-exists':
289
+ return this.checkFieldExists(criterion.file, criterion.field);
290
+
291
+ case 'schema-valid':
292
+ return this.validateSchema(criterion.file, criterion.schema);
293
+
294
+ default:
295
+ return true; // 未知类型交给 Layer 2
296
+ }
297
+ }
298
+
299
+ /**
300
+ * 执行 Layer 2 AI 审查
301
+ */
302
+ async executeLayer2(benchmark, stage) {
303
+ // Layer 2 需要调用 AI Agent 进行审查
304
+ // 这里返回一个占位结果,实际实现需要集成 AI Agent
305
+
306
+ return {
307
+ status: 'passed',
308
+ score: 85,
309
+ findings: [],
310
+ reviewer: 'ai-agent',
311
+ reviewed_at: new Date().toISOString()
312
+ };
313
+ }
314
+
315
+ /**
316
+ * 计算汇总结果
317
+ */
318
+ calculateSummary(results) {
319
+ const total = results.length;
320
+ const passed = results.filter(r => r.status === 'passed').length;
321
+ const failed = results.filter(r => r.status === 'failed').length;
322
+
323
+ // 计算加权平均分
324
+ let totalWeight = 0;
325
+ let weightedScore = 0;
326
+
327
+ for (const result of results) {
328
+ // 根据 benchmark 优先级分配权重
329
+ const weight = result.priority === 'P0' ? 3 :
330
+ result.priority === 'P1' ? 2 : 1;
331
+ weightedScore += result.score * weight;
332
+ totalWeight += weight;
333
+ }
334
+
335
+ const overallScore = totalWeight > 0 ? weightedScore / totalWeight : 0;
336
+
337
+ // 确定等级
338
+ const grade = this.calculateGrade(overallScore);
339
+
340
+ return {
341
+ total,
342
+ passed,
343
+ failed,
344
+ overall_score: Math.round(overallScore),
345
+ grade,
346
+ pass_rate: total > 0 ? (passed / total * 100).toFixed(2) : 0
347
+ };
348
+ }
349
+
350
+ /**
351
+ * 计算等级
352
+ */
353
+ calculateGrade(score) {
354
+ if (score >= 95) return 'A+';
355
+ if (score >= 90) return 'A';
356
+ if (score >= 85) return 'B+';
357
+ if (score >= 80) return 'B';
358
+ if (score >= 70) return 'C';
359
+ return 'D';
360
+ }
361
+
362
+ /**
363
+ * 生成改进建议
364
+ */
365
+ generateRecommendations(results) {
366
+ const recommendations = [];
367
+
368
+ for (const result of results) {
369
+ if (result.status === 'failed' || result.status === 'error') {
370
+ const rec = {
371
+ id: `rec-${result.benchmark_id}`,
372
+ benchmark_id: result.benchmark_id,
373
+ benchmark_name: result.name,
374
+ priority: this.inferPriority(result),
375
+ issue: this.describeIssue(result),
376
+ suggestion: this.generateSuggestion(result),
377
+ auto_fixable: this.isAutoFixable(result)
378
+ };
379
+ recommendations.push(rec);
380
+ }
381
+ }
382
+
383
+ // 按优先级排序
384
+ const priorityOrder = { P0: 0, P1: 1, P2: 2 };
385
+ recommendations.sort((a, b) => priorityOrder[a.priority] - priorityOrder[b.priority]);
386
+
387
+ return recommendations;
388
+ }
389
+
390
+ /**
391
+ * 推断优先级
392
+ */
393
+ inferPriority(result) {
394
+ // 基于 benchmark id 或分数推断
395
+ if (result.benchmark_id?.startsWith('EXP-')) return 'P0';
396
+ if (result.score < 50) return 'P0';
397
+ if (result.score < 70) return 'P1';
398
+ return 'P2';
399
+ }
400
+
401
+ /**
402
+ * 描述问题
403
+ */
404
+ describeIssue(result) {
405
+ if (result.layer1_result?.status === 'failed') {
406
+ const failedChecks = result.layer1_result.checks.filter(c => !c.passed);
407
+ return failedChecks.map(c => c.criterion).join('; ');
408
+ }
409
+ if (result.layer2_result?.status === 'failed') {
410
+ return 'AI 审查未通过';
411
+ }
412
+ return '未知问题';
413
+ }
414
+
415
+ /**
416
+ * 生成建议
417
+ */
418
+ generateSuggestion(result) {
419
+ // 基于问题和阶段生成具体建议
420
+ const suggestions = {
421
+ 'EXP-001': '补充至少3个澄清问题,覆盖功能、非功能和边界条件',
422
+ 'REQ-001': '完善需求文档,确保所有必填字段完整',
423
+ 'DES-001': '补充架构设计文档,包含技术选型理由',
424
+ 'IMP-001': '确保代码实现符合设计文档',
425
+ 'IMP-002': '优化代码质量,确保测试覆盖率达标',
426
+ 'TST-001': '补充测试用例,提高代码覆盖率'
427
+ };
428
+
429
+ return suggestions[result.benchmark_id] || '请参考 benchmark 要求完善产出物';
430
+ }
431
+
432
+ /**
433
+ * 判断是否可自动修复
434
+ */
435
+ isAutoFixable(result) {
436
+ // 某些问题可以自动修复
437
+ const autoFixableBenchmarks = ['format', 'lint', 'import'];
438
+ return autoFixableBenchmarks.some(id => result.benchmark_id?.includes(id));
439
+ }
440
+
441
+ /**
442
+ * 收集问题列表
443
+ */
444
+ collectIssues(result) {
445
+ const issues = [];
446
+
447
+ if (result.layer1_result?.checks) {
448
+ for (const check of result.layer1_result.checks) {
449
+ if (!check.passed) {
450
+ issues.push({
451
+ layer: 1,
452
+ type: 'criterion-failed',
453
+ description: check.criterion,
454
+ actual: check.actual,
455
+ expected: check.expected
456
+ });
457
+ }
458
+ }
459
+ }
460
+
461
+ if (result.layer2_result?.findings) {
462
+ for (const finding of result.layer2_result.findings) {
463
+ issues.push({
464
+ layer: 2,
465
+ type: finding.type,
466
+ description: finding.description,
467
+ severity: finding.severity
468
+ });
469
+ }
470
+ }
471
+
472
+ return issues;
473
+ }
474
+
475
+ /**
476
+ * 保存报告
477
+ */
478
+ saveReport(report) {
479
+ const filename = `${report.stage}-${report.run_id}.json`;
480
+ const filepath = path.join(this.resultsPath, filename);
481
+ fs.writeFileSync(filepath, JSON.stringify(report, null, 2));
482
+ return filepath;
483
+ }
484
+
485
+ /**
486
+ * 生成运行 ID
487
+ */
488
+ generateRunId() {
489
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
490
+ const random = Math.random().toString(36).substr(2, 6);
491
+ return `${timestamp}-${random}`;
492
+ }
493
+
494
+ /**
495
+ * 获取最新报告
496
+ */
497
+ getLatestReport(stage) {
498
+ const files = fs.readdirSync(this.resultsPath)
499
+ .filter(f => f.startsWith(`${stage}-`) && f.endsWith('.json'))
500
+ .sort()
501
+ .reverse();
502
+
503
+ if (files.length === 0) {
504
+ return null;
505
+ }
506
+
507
+ const content = fs.readFileSync(path.join(this.resultsPath, files[0]), 'utf8');
508
+ return JSON.parse(content);
509
+ }
510
+
511
+ /**
512
+ * 获取历史趋势
513
+ */
514
+ getTrend(stage, limit = 10) {
515
+ const files = fs.readdirSync(this.resultsPath)
516
+ .filter(f => f.startsWith(`${stage}-`) && f.endsWith('.json'))
517
+ .sort()
518
+ .reverse()
519
+ .slice(0, limit);
520
+
521
+ return files.map(f => {
522
+ const content = fs.readFileSync(path.join(this.resultsPath, f), 'utf8');
523
+ const report = JSON.parse(content);
524
+ return {
525
+ timestamp: report.timestamp,
526
+ score: report.summary.overall_score,
527
+ grade: report.summary.grade,
528
+ pass_rate: report.summary.pass_rate
529
+ };
530
+ }).reverse();
531
+ }
532
+
533
+ // 辅助方法(需要根据实际情况实现)
534
+ countClarificationQuestions() {
535
+ // 从 clarifications.md 中统计问题数量
536
+ try {
537
+ const filePath = path.join(this.projectRoot, 'specs', 'clarifications.md');
538
+ if (!fs.existsSync(filePath)) return 0;
539
+
540
+ const content = fs.readFileSync(filePath, 'utf8');
541
+ const matches = content.match(/^\s*-\s*\*\*Q\d+:/gm);
542
+ return matches ? matches.length : 0;
543
+ } catch {
544
+ return 0;
545
+ }
546
+ }
547
+
548
+ calculateFieldCompleteness(stage) {
549
+ // 计算字段完整率
550
+ return 85; // 占位实现
551
+ }
552
+
553
+ getTestCoverage() {
554
+ // 获取测试覆盖率
555
+ try {
556
+ const coveragePath = path.join(this.projectRoot, 'coverage', 'coverage-summary.json');
557
+ if (!fs.existsSync(coveragePath)) return 0;
558
+
559
+ const data = JSON.parse(fs.readFileSync(coveragePath, 'utf8'));
560
+ return data.total?.lines?.pct || 0;
561
+ } catch {
562
+ return 0;
563
+ }
564
+ }
565
+
566
+ getCodeComplexity() {
567
+ // 获取代码复杂度
568
+ return 8; // 占位实现
569
+ }
570
+
571
+ getLineCoverage() {
572
+ return this.getTestCoverage();
573
+ }
574
+
575
+ getBranchCoverage() {
576
+ try {
577
+ const coveragePath = path.join(this.projectRoot, 'coverage', 'coverage-summary.json');
578
+ if (!fs.existsSync(coveragePath)) return 0;
579
+
580
+ const data = JSON.parse(fs.readFileSync(coveragePath, 'utf8'));
581
+ return data.total?.branches?.pct || 0;
582
+ } catch {
583
+ return 0;
584
+ }
585
+ }
586
+
587
+ checkFieldExists(file, field) {
588
+ try {
589
+ const filePath = path.join(this.projectRoot, file);
590
+ if (!fs.existsSync(filePath)) return false;
591
+
592
+ const content = fs.readFileSync(filePath, 'utf8');
593
+ return content.includes(field);
594
+ } catch {
595
+ return false;
596
+ }
597
+ }
598
+
599
+ validateSchema(file, schema) {
600
+ // 验证文件是否符合 schema
601
+ return true; // 占位实现
602
+ }
603
+ }
604
+
605
+ module.exports = BenchmarkEvaluator;