@chongyan/autospec 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. package/LICENSE +21 -21
  2. package/README.en.md +447 -321
  3. package/README.md +418 -286
  4. package/knowledge/01-principles/00-principles-hierarchy.md +247 -0
  5. package/knowledge/01-principles/01-first-principles.md +241 -0
  6. package/knowledge/01-principles/02-strategic-principles.md +286 -0
  7. package/knowledge/01-principles/03-tactical-principles.md +385 -0
  8. package/knowledge/01-principles/04-operational-principles.md +275 -0
  9. package/knowledge/01-principles/05-domain-principles.md +539 -0
  10. package/knowledge/01-principles/06-methodology-principles.md +281 -0
  11. package/knowledge/01-principles/07-cognitive-principles.md +277 -0
  12. package/knowledge/01-principles/08-auto-fix-principles.md +320 -0
  13. package/knowledge/01-principles/09-constitution.md +220 -0
  14. package/knowledge/{principles/evolution.md → 01-principles/10-evolution-mechanism.md} +160 -14
  15. package/knowledge/01-principles/README.en.md +385 -0
  16. package/knowledge/01-principles/README.md +385 -0
  17. package/knowledge/{process/overview.md → 02-process/00-overview.md} +90 -5
  18. package/knowledge/02-process/README.en.md +143 -0
  19. package/knowledge/02-process/README.md +186 -0
  20. package/knowledge/{guides/support/pipeline-protocol.md → 03-guides/00-pipeline-protocol.md} +10 -10
  21. package/knowledge/{guides/support/team-orchestrator.md → 03-guides/01-team-orchestrator.md} +53 -8
  22. package/knowledge/{guides/stages/requirement-analyzer.md → 03-guides/02-analyze-requirement.md} +3 -3
  23. package/knowledge/{guides/stages/ai-effect-evaluator.md → 03-guides/08-evaluate-ai-effect.md} +14 -7
  24. package/knowledge/{guides/support/skill-distiller.md → 03-guides/19-distill-skill.md} +3 -3
  25. package/knowledge/{guides/support/skill-updater.md → 03-guides/20-update-skill.md} +1 -1
  26. package/knowledge/{guides/support/methodology-extractor.md → 03-guides/22-extract-methodology.md} +2 -2
  27. package/knowledge/{guides/support/complexity-assessor.md → 03-guides/24-assess-complexity.md} +6 -4
  28. package/knowledge/{guides/support/tech-stack-analyzer.md → 03-guides/26-analyze-tech-stack.md} +1 -1
  29. package/knowledge/{guides/domain-driven-design.md → 03-guides/42-apply-ddd.md} +1 -1
  30. package/knowledge/{process/ai-sdlc.md → 03-guides/43-run-ai-sdlc.md} +1 -1
  31. package/knowledge/{guides/knowledge-management.md → 03-guides/44-manage-knowledge.md} +4 -4
  32. package/knowledge/03-guides/README.en.md +212 -0
  33. package/knowledge/03-guides/README.md +212 -0
  34. package/knowledge/{checklists/requirement.md → 04-checklists/00-requirement.md} +1 -1
  35. package/knowledge/{checklists/design.md → 04-checklists/01-design.md} +1 -1
  36. package/knowledge/{checklists/code.md → 04-checklists/02-code.md} +16 -1
  37. package/knowledge/{checklists/release.md → 04-checklists/04-release.md} +1 -1
  38. package/knowledge/04-checklists/README.en.md +119 -0
  39. package/knowledge/04-checklists/README.md +123 -0
  40. package/knowledge/{config/validation-patterns.yaml → 05-config/00-validation-patterns.yaml} +1 -1
  41. package/knowledge/{config/team-tasks.yaml → 05-config/02-team-tasks.yaml} +2 -2
  42. package/knowledge/05-config/03-role-composition.yaml +346 -0
  43. package/knowledge/{config/skill-compositions.yaml → 05-config/05-skill-compositions.yaml} +24 -24
  44. package/knowledge/05-config/README.en.md +54 -0
  45. package/knowledge/05-config/README.md +132 -0
  46. package/knowledge/06-environment/00-template-registry.md +310 -0
  47. package/knowledge/06-environment/01-detection-patterns.yaml +1692 -0
  48. package/knowledge/{environment → 06-environment}/README.en.md +4 -0
  49. package/knowledge/{environment → 06-environment}/README.md +66 -25
  50. package/knowledge/{standards/coding-style.md → 07-standards/00-coding-style.md} +123 -4
  51. package/knowledge/{standards/code-review.md → 07-standards/01-code-review.md} +3 -3
  52. package/knowledge/{standards/data-consistency.md → 07-standards/02-data-consistency.md} +1 -1
  53. package/knowledge/{standards/document-versioning.md → 07-standards/03-document-versioning.md} +1 -1
  54. package/knowledge/{standards/risk-detection.md → 07-standards/04-risk-detection.md} +5 -5
  55. package/knowledge/07-standards/README.en.md +119 -0
  56. package/knowledge/07-standards/README.md +123 -0
  57. package/knowledge/08-organization/00-vision-mission.md +113 -0
  58. package/knowledge/{organization/ai-native-team.md → 08-organization/01-ai-native-culture.md} +1 -1
  59. package/knowledge/{organization/team-metrics.md → 08-organization/02-team-metrics.md} +1 -1
  60. package/knowledge/08-organization/03-committee-structure.md +54 -0
  61. package/knowledge/08-organization/04-governance-metrics.md +55 -0
  62. package/knowledge/08-organization/05-improvement-process.md +71 -0
  63. package/knowledge/08-organization/README.en.md +165 -0
  64. package/knowledge/08-organization/README.md +165 -0
  65. package/knowledge/09-templates/00-requirement-proposal.md +344 -0
  66. package/knowledge/09-templates/01-architecture-design.md +494 -0
  67. package/knowledge/09-templates/02-api-design.md +408 -0
  68. package/knowledge/09-templates/03-database-design.md +313 -0
  69. package/knowledge/09-templates/04-product-design.md +237 -0
  70. package/knowledge/09-templates/05-domain-business.md +388 -0
  71. package/knowledge/09-templates/06-test-design.md +268 -0
  72. package/knowledge/09-templates/07-evaluation-design.md +372 -0
  73. package/knowledge/09-templates/08-component-knowledge.md +272 -0
  74. package/knowledge/09-templates/09-best-practices.md +218 -0
  75. package/knowledge/{environment/middleware-knowledge.md → 09-templates/10-middleware-knowledge.md} +106 -1
  76. package/knowledge/09-templates/README.en.md +222 -0
  77. package/knowledge/09-templates/README.md +216 -0
  78. package/knowledge/README.en.md +372 -0
  79. package/knowledge/README.md +354 -99
  80. package/package.json +1 -1
  81. package/plugins/.claude-plugin/plugin.json +460 -81
  82. package/plugins/agents/roles/ceo.md +1 -1
  83. package/plugins/agents/roles/product-owner.md +1 -1
  84. package/plugins/agents/roles/tech-lead.md +1 -1
  85. package/plugins/agents/support/consistency-checker.md +36 -3
  86. package/plugins/agents/support/monitoring-agent.md +215 -0
  87. package/plugins/agents/support/safety-auditor.md +2 -2
  88. package/plugins/agents/support/stage-gate-evaluator.md +95 -11
  89. package/plugins/agents/support/test-coverage-reviewer.md +1 -1
  90. package/plugins/benchmarks/templates/README.md +165 -13
  91. package/plugins/benchmarks/templates/commands/apply-template.yaml +108 -0
  92. package/plugins/benchmarks/templates/commands/archive-template.yaml +65 -0
  93. package/plugins/benchmarks/templates/commands/env-export-template.yaml +64 -0
  94. package/plugins/benchmarks/templates/commands/env-sync-template.yaml +104 -0
  95. package/plugins/benchmarks/templates/commands/env-template-template.yaml +96 -0
  96. package/plugins/benchmarks/templates/commands/env-template.yaml +58 -0
  97. package/plugins/benchmarks/templates/commands/env-update-template.yaml +110 -0
  98. package/plugins/benchmarks/templates/commands/env-validate-template.yaml +95 -0
  99. package/plugins/benchmarks/templates/commands/field-evolve-template.yaml +104 -0
  100. package/plugins/benchmarks/templates/commands/project-evolve-template.yaml +104 -0
  101. package/plugins/benchmarks/templates/commands/propose-template.yaml +88 -0
  102. package/plugins/benchmarks/templates/commands/review-template.yaml +124 -0
  103. package/plugins/benchmarks/templates/commands/run-template.yaml +127 -0
  104. package/plugins/benchmarks/templates/commands/test-template.yaml +149 -0
  105. package/plugins/benchmarks/templates/pipeline/experiment-template.yaml +92 -0
  106. package/plugins/benchmarks/templates/pipeline/hotfix-template.yaml +81 -0
  107. package/plugins/benchmarks/templates/skills/agile-iteration-template.yaml +78 -0
  108. package/plugins/benchmarks/templates/skills/benchmark-executor-template.yaml +114 -0
  109. package/plugins/benchmarks/templates/skills/benchmark-generator-template.yaml +52 -0
  110. package/plugins/benchmarks/templates/skills/delivery-stage-template.yaml +130 -0
  111. package/plugins/benchmarks/templates/skills/design-stage-template.yaml +131 -0
  112. package/plugins/benchmarks/templates/skills/experiment-iteration-template.yaml +60 -0
  113. package/plugins/benchmarks/templates/skills/exploration-phase-template.yaml +114 -0
  114. package/plugins/benchmarks/templates/skills/field-evolve-analyzer-template.yaml +51 -0
  115. package/plugins/benchmarks/templates/skills/field-evolve-distiller-template.yaml +34 -0
  116. package/plugins/benchmarks/templates/skills/field-evolve-executor-template.yaml +50 -0
  117. package/plugins/benchmarks/templates/skills/field-evolve-fixer-template.yaml +52 -0
  118. package/plugins/benchmarks/templates/skills/field-evolve-learner-template.yaml +33 -0
  119. package/plugins/benchmarks/templates/skills/field-evolve-scanner-template.yaml +74 -0
  120. package/plugins/benchmarks/templates/skills/field-evolve-template.yaml +71 -0
  121. package/plugins/benchmarks/templates/skills/field-evolve-verifier-template.yaml +51 -0
  122. package/plugins/benchmarks/templates/skills/hotfix-iteration-template.yaml +54 -0
  123. package/plugins/benchmarks/templates/skills/implementation-stage-template.yaml +127 -0
  124. package/plugins/benchmarks/templates/skills/layer1-validation-template.yaml +121 -0
  125. package/plugins/benchmarks/templates/skills/project-evolve-analyzer-template.yaml +51 -0
  126. package/plugins/benchmarks/templates/skills/project-evolve-fixer-template.yaml +52 -0
  127. package/plugins/benchmarks/templates/skills/project-evolve-generator-template.yaml +34 -0
  128. package/plugins/benchmarks/templates/skills/project-evolve-learner-template.yaml +50 -0
  129. package/plugins/benchmarks/templates/skills/project-evolve-reviewer-template.yaml +50 -0
  130. package/plugins/benchmarks/templates/skills/project-evolve-scanner-template.yaml +75 -0
  131. package/plugins/benchmarks/templates/skills/project-evolve-template.yaml +72 -0
  132. package/plugins/benchmarks/templates/skills/project-evolve-verifier-template.yaml +51 -0
  133. package/plugins/benchmarks/templates/skills/skill-forge-template.yaml +117 -0
  134. package/plugins/benchmarks/templates/skills/startup-guard-template.yaml +103 -0
  135. package/plugins/benchmarks/templates/skills/testing-stage-template.yaml +146 -0
  136. package/plugins/benchmarks/templates/skills/waterfall-iteration-template.yaml +55 -0
  137. package/plugins/commands/README.en.md +2 -2
  138. package/plugins/commands/README.md +2 -2
  139. package/plugins/commands/apply.md +102 -16
  140. package/plugins/commands/archive.md +60 -4
  141. package/plugins/commands/env-sync.md +1047 -406
  142. package/plugins/commands/env-template.md +11 -135
  143. package/plugins/commands/env-update.md +1 -1
  144. package/plugins/commands/env-validate.md +3 -3
  145. package/plugins/commands/explore.md +118 -1
  146. package/plugins/commands/field-evolve.md +51 -175
  147. package/plugins/commands/project-evolve.md +167 -68
  148. package/plugins/commands/propose.md +97 -6
  149. package/plugins/commands/review.md +5 -5
  150. package/plugins/commands/run.md +841 -13
  151. package/plugins/commands/status.md +138 -17
  152. package/plugins/commands/test.md +389 -0
  153. package/plugins/hooks/constitution-guard.js +1 -1
  154. package/plugins/hooks/environment-autocommit.js +366 -24
  155. package/plugins/hooks/environment-manager.js +3 -2
  156. package/plugins/hooks/execution-tracker.js +109 -4
  157. package/plugins/hooks/layer1-validator.js +117 -1
  158. package/plugins/hooks/lib/auto-fix-loop.js +605 -0
  159. package/plugins/hooks/lib/environment-config-loader.js +11 -7
  160. package/plugins/hooks/lib/hook-state-manager.js +98 -0
  161. package/plugins/hooks/lib/memory-extractor.js +27 -5
  162. package/plugins/hooks/lib/memory-manager.js +1 -1
  163. package/plugins/hooks/lib/test-auto-fix.test.js +194 -0
  164. package/plugins/hooks/monitoring-trigger.js +467 -0
  165. package/plugins/skills/README.en.md +15 -3
  166. package/plugins/skills/README.md +21 -11
  167. package/plugins/skills/agile-iteration/SKILL.md +187 -0
  168. package/plugins/skills/delivery-stage/SKILL.md +133 -12
  169. package/plugins/skills/design-stage/SKILL.md +103 -12
  170. package/plugins/skills/experiment-evaluator/SKILL.md +271 -0
  171. package/plugins/skills/experiment-iteration/SKILL.md +154 -0
  172. package/plugins/skills/exploration-phase/SKILL.md +93 -10
  173. package/plugins/skills/field-evolve-analyzer/SKILL.md +65 -0
  174. package/plugins/skills/field-evolve-distiller/SKILL.md +66 -0
  175. package/plugins/skills/field-evolve-executor/SKILL.md +94 -0
  176. package/plugins/skills/field-evolve-executor/executor.js +342 -0
  177. package/plugins/skills/field-evolve-fixer/SKILL.md +69 -0
  178. package/plugins/skills/field-evolve-learner/SKILL.md +65 -0
  179. package/plugins/skills/field-evolve-scanner/SKILL.md +87 -0
  180. package/plugins/skills/field-evolve-scanner/scripts/fallback-scanner.js +288 -0
  181. package/plugins/skills/field-evolve-verifier/SKILL.md +64 -0
  182. package/plugins/skills/hotfix-iteration/SKILL.md +279 -0
  183. package/plugins/skills/implementation-stage/SKILL.md +156 -15
  184. package/plugins/skills/layer1-validation/SKILL.md +1 -1
  185. package/plugins/skills/pending-dashboard/SKILL.md +9 -8
  186. package/plugins/skills/project-evolve-analyzer/SKILL.md +95 -0
  187. package/plugins/skills/project-evolve-fixer/SKILL.md +99 -0
  188. package/plugins/skills/project-evolve-generator/SKILL.md +149 -0
  189. package/plugins/skills/project-evolve-learner/SKILL.md +103 -0
  190. package/plugins/skills/project-evolve-reviewer/SKILL.md +104 -0
  191. package/plugins/skills/project-evolve-scanner/SKILL.md +95 -0
  192. package/plugins/skills/project-evolve-scanner/scripts/dependency-reuse-checker.js +395 -0
  193. package/plugins/skills/project-evolve-scanner/scripts/subsystem-coverage.js +315 -0
  194. package/plugins/skills/project-evolve-verifier/SKILL.md +105 -0
  195. package/plugins/skills/requirement-stage/SKILL.md +47 -13
  196. package/plugins/skills/skill-forge/SKILL.md +2 -2
  197. package/plugins/skills/testing-stage/SKILL.md +583 -8
  198. package/plugins/skills/waterfall-iteration/SKILL.md +115 -0
  199. package/scripts/cli/index.js +1 -1
  200. package/scripts/cli/init.js +30 -4
  201. package/scripts/cli/list.js +3 -2
  202. package/scripts/config/commands.config.js +8 -8
  203. package/scripts/config/hooks.config.js +1 -1
  204. package/scripts/install/constants.js +204 -165
  205. package/scripts/state.js +210 -1
  206. package/knowledge/config/README.en.md +0 -44
  207. package/knowledge/config/README.md +0 -44
  208. package/knowledge/config/role-composition.yaml +0 -98
  209. package/knowledge/config/team-triggers.yaml +0 -198
  210. package/knowledge/domain/README.md +0 -115
  211. package/knowledge/domain/flows/README.md +0 -194
  212. package/knowledge/domain/glossary.md +0 -143
  213. package/knowledge/domain/rules.md +0 -138
  214. package/knowledge/environment/component-knowledge.md +0 -316
  215. package/knowledge/environment/detection-patterns.yaml +0 -502
  216. package/knowledge/environment/template-registry.md +0 -321
  217. package/knowledge/guides/requirement-engineering.md +0 -329
  218. package/knowledge/guides/system-design.md +0 -352
  219. package/knowledge/principles/constitution.md +0 -134
  220. package/knowledge/principles/core-principles.md +0 -368
  221. package/knowledge/principles/design-philosophy.md +0 -877
  222. package/knowledge/process/README.en.md +0 -38
  223. package/knowledge/process/README.md +0 -48
  224. package/knowledge/templates/ai-evaluation.md +0 -150
  225. package/knowledge/templates/api-design.md +0 -117
  226. package/knowledge/templates/database-design.md +0 -132
  227. package/knowledge/templates/domain-driven-design.md +0 -321
  228. package/knowledge/templates/product-proposal.md +0 -201
  229. package/knowledge/templates/system-design.md +0 -227
  230. package/knowledge/templates/task-breakdown.md +0 -107
  231. package/knowledge/templates/test-case.md +0 -170
  232. package/plugins/commands/validate.md +0 -108
  233. package/plugins/skills/benchmark-executor/README.md +0 -93
  234. package/plugins/skills/evolution-process/SKILL.md +0 -291
  235. package/plugins/skills/project-evolution/SKILL.md +0 -847
  236. package/scripts/evolution/evolution-router.js +0 -273
  237. package/scripts/evolution/evolution-signal-collector.js +0 -307
  238. package/scripts/evolution/knowledge-loader.js +0 -346
  239. package/scripts/evolution/marketplace.js +0 -317
  240. package/scripts/evolution/version-manager.js +0 -371
  241. /package/knowledge/{process → 02-process}/01-requirement.md +0 -0
  242. /package/knowledge/{process → 02-process}/02-design.md +0 -0
  243. /package/knowledge/{process → 02-process}/03-implementation.md +0 -0
  244. /package/knowledge/{process → 02-process}/04-review.md +0 -0
  245. /package/knowledge/{process → 02-process}/05-testing.md +0 -0
  246. /package/knowledge/{process → 02-process}/06-delivery.md +0 -0
  247. /package/knowledge/{guides/stages/design-planner.md → 03-guides/03-design-solution.md} +0 -0
  248. /package/knowledge/{guides/stages/code-implementer.md → 03-guides/04-implement-code.md} +0 -0
  249. /package/knowledge/{guides/stages/test-planner.md → 03-guides/05-plan-testing.md} +0 -0
  250. /package/knowledge/{guides/stages/test-generator.md → 03-guides/06-generate-tests.md} +0 -0
  251. /package/knowledge/{guides/stages/release-checker.md → 03-guides/07-check-release.md} +0 -0
  252. /package/knowledge/{guides/stages/requirement-reviewer.md → 03-guides/09-review-requirement.md} +0 -0
  253. /package/knowledge/{guides/stages/design-reviewer.md → 03-guides/10-review-design.md} +0 -0
  254. /package/knowledge/{guides/stages/code-reviewer.md → 03-guides/11-review-code.md} +0 -0
  255. /package/knowledge/{guides/stages/test-reviewer.md → 03-guides/12-review-testing.md} +0 -0
  256. /package/knowledge/{guides/stages/security-reviewer.md → 03-guides/13-audit-security.md} +0 -0
  257. /package/knowledge/{guides/stages/consistency-checker.md → 03-guides/14-check-consistency.md} +0 -0
  258. /package/knowledge/{guides/stages/unit-test-runner.md → 03-guides/15-run-unit-tests.md} +0 -0
  259. /package/knowledge/{guides/stages/integration-test-runner.md → 03-guides/16-run-integration-tests.md} +0 -0
  260. /package/knowledge/{guides/stages/test-context-analyzer.md → 03-guides/17-analyze-test-context.md} +0 -0
  261. /package/knowledge/{guides/support/practice-logger.md → 03-guides/18-log-practice.md} +0 -0
  262. /package/knowledge/{guides/support/skill-validator.md → 03-guides/21-validate-skill.md} +0 -0
  263. /package/knowledge/{guides/support/scope-inference.md → 03-guides/23-infer-scope.md} +0 -0
  264. /package/knowledge/{guides/support/component-discovery.md → 03-guides/25-discover-component.md} +0 -0
  265. /package/knowledge/{guides/support/environment-scanner.md → 03-guides/27-scan-environment.md} +0 -0
  266. /package/knowledge/{guides/support/environment-validator.md → 03-guides/28-validate-environment.md} +0 -0
  267. /package/knowledge/{guides/support/knowledge-generator.md → 03-guides/29-generate-knowledge.md} +0 -0
  268. /package/knowledge/{guides/support/ai-capability-analyzer.md → 03-guides/30-analyze-ai-capability.md} +0 -0
  269. /package/knowledge/{guides/support/ai-component-analyzer.md → 03-guides/31-analyze-ai-component.md} +0 -0
  270. /package/knowledge/{guides/support/ai-agent-analyzer.md → 03-guides/32-analyze-ai-agent.md} +0 -0
  271. /package/knowledge/{guides/support/ai-rag-analyzer.md → 03-guides/33-analyze-ai-rag.md} +0 -0
  272. /package/knowledge/{guides/support/ai-task-assessor.md → 03-guides/34-assess-ai-task.md} +0 -0
  273. /package/knowledge/{guides/support/ai-pipeline-evaluator.md → 03-guides/35-evaluate-ai-pipeline.md} +0 -0
  274. /package/knowledge/{guides/support/ai-artifact-evaluator.md → 03-guides/36-evaluate-ai-artifact.md} +0 -0
  275. /package/knowledge/{guides/support/ai-evaluation-planner.md → 03-guides/37-plan-ai-evaluation.md} +0 -0
  276. /package/knowledge/{guides/support/ai-path-evaluator.md → 03-guides/38-evaluate-ai-path.md} +0 -0
  277. /package/knowledge/{guides/support/ai-data-validator.md → 03-guides/39-validate-ai-data.md} +0 -0
  278. /package/knowledge/{guides/support/ai-anomaly-analyzer.md → 03-guides/40-detect-ai-anomaly.md} +0 -0
  279. /package/knowledge/{guides/support/ai-test-diagnostics.md → 03-guides/41-diagnose-ai-test.md} +0 -0
  280. /package/knowledge/{guides/support/test-runner.md → 03-guides/45-test-runner.md} +0 -0
  281. /package/knowledge/{checklists/test.md → 04-checklists/03-test.md} +0 -0
  282. /package/knowledge/{config/team-stage.yaml → 05-config/01-team-stage.yaml} +0 -0
  283. /package/knowledge/{config/role-extensions.yaml → 05-config/04-role-extensions.yaml} +0 -0
@@ -0,0 +1,60 @@
1
+ # AutoSpec Skill Benchmark Template - Experiment-Iteration
2
+ # 适用于: 测试 experiment-iteration skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-experiment-iteration"
7
+ description: "Experiment-Iteration Skill 基准测试 - 实验模式验证流程"
8
+
9
+ type: skill
10
+ target: experiment-iteration
11
+
12
+ testCases:
13
+ - name: "simple-experiment"
14
+ input:
15
+ context: "验证 {technology} 用于 {use-case} 的可行性"
16
+ complexity: 3
17
+ expectedBehaviors:
18
+ - "定义假设"
19
+ - "设计实验"
20
+ - "实现原型"
21
+ - "执行评测"
22
+ - "验证假设"
23
+ expectedOutput:
24
+ - "hypothesis.md"
25
+ - "prototype 代码"
26
+ - "evaluation-report.md"
27
+ - "conclusion.md"
28
+ successCriteria:
29
+ - "假设定义清晰"
30
+ - "评测执行完整"
31
+ - "结论明确"
32
+ qualityMetrics:
33
+ - "假设验证完整率 = 100%"
34
+ - "评测执行率 = 100%"
35
+ maxDuration: 1800
36
+
37
+ - name: "ai-experiment"
38
+ input:
39
+ context: "验证 AI 模型效果"
40
+ complexity: 5
41
+ expectedBehaviors:
42
+ - "定义效果指标"
43
+ - "构建评测数据集"
44
+ - "训练/微调模型"
45
+ - "执行效果评测"
46
+ expectedOutput:
47
+ - "evaluation-plan.md"
48
+ - "dataset.jsonl"
49
+ - "evaluation-report.md"
50
+ successCriteria:
51
+ - "效果指标可测量"
52
+ - "评测数据集有代表性"
53
+ qualityMetrics:
54
+ - "指标完整率 >= 90%"
55
+ - "数据集覆盖率 >= 80%"
56
+ maxDuration: 3600
57
+
58
+ successCriteria:
59
+ passRate: 80
60
+ avgFieldCompletion: 85
@@ -0,0 +1,114 @@
1
+ # AutoSpec Skill Benchmark Template - Exploration-Phase
2
+ # 适用于: 测试 exploration-phase skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-exploration-phase"
7
+ description: "Exploration-Phase Skill 基准测试"
8
+
9
+ type: skill
10
+ target: exploration-phase
11
+
12
+ testCases:
13
+ - name: "simple-exploration"
14
+ input:
15
+ context: "实现 {feature-name} 功能"
16
+ complexity: 1
17
+ expectedBehaviors:
18
+ - "执行复杂度评估(simple)"
19
+ - "启动 2 个 Agent(CEO + 产品负责人)"
20
+ - "执行 2-3 轮澄清"
21
+ - "每轮最多 5 个问题"
22
+ expectedOutput:
23
+ - "clarifications.md"
24
+ - "requirement.md"
25
+ - "复杂度评估结果"
26
+ successCriteria:
27
+ - "澄清轮次 2-3 轮"
28
+ - "问题数 <= 5 个/轮"
29
+ - "需求文档结构完整"
30
+ qualityMetrics:
31
+ - "字段完整率 >= 90%"
32
+ - "澄清问题质量 >= 80%"
33
+ maxDuration: 600
34
+
35
+ - name: "medium-exploration"
36
+ input:
37
+ context: "实现 {feature} 功能,包含多个模块"
38
+ complexity: 3
39
+ expectedBehaviors:
40
+ - "执行复杂度评估(medium)"
41
+ - "启动 3 个 Agent(CEO + 产品 + 技术)"
42
+ - "执行 3-4 轮澄清"
43
+ - "识别技术风险"
44
+ expectedOutput:
45
+ - "clarifications.md"
46
+ - "requirement.md"
47
+ - "技术风险评估"
48
+ successCriteria:
49
+ - "澄清轮次 3-4 轮"
50
+ - "技术风险识别完整"
51
+ qualityMetrics:
52
+ - "风险识别率 >= 90%"
53
+ maxDuration: 900
54
+
55
+ - name: "complex-exploration"
56
+ input:
57
+ context: "实现 {feature},包含 AI 模型、后端 API、前端界面"
58
+ complexity: 5
59
+ expectedBehaviors:
60
+ - "执行复杂度评估(complex)"
61
+ - "启动 4+ 个 Agent"
62
+ - "执行 5-6 轮澄清"
63
+ - "识别多系统边界"
64
+ - "定义系统间契约"
65
+ expectedOutput:
66
+ - "clarifications.md"
67
+ - "requirement.md"
68
+ - "多系统需求分析"
69
+ - "系统间契约定义"
70
+ successCriteria:
71
+ - "多系统识别完整"
72
+ - "系统间契约清晰"
73
+ qualityMetrics:
74
+ - "系统识别率 = 100%"
75
+ - "契约完整率 >= 90%"
76
+ maxDuration: 1200
77
+
78
+ - name: "ambiguous-requirement"
79
+ input:
80
+ context: "做一个好用的后台管理系统"
81
+ complexity: 3
82
+ expectedBehaviors:
83
+ - "识别需求歧义"
84
+ - "执行结构化澄清"
85
+ - "每个问题附 AI 推荐答案"
86
+ - "支持用户提前终止"
87
+ successCriteria:
88
+ - "歧义识别完整"
89
+ - "澄清问题 <= 5 个"
90
+ - "每个问题有推荐答案"
91
+ qualityMetrics:
92
+ - "歧义识别率 >= 90%"
93
+ - "推荐答案合理率 >= 90%"
94
+ maxDuration: 600
95
+
96
+ - name: "early-termination"
97
+ input:
98
+ context: "实现 {feature} 功能"
99
+ earlyTermination: true
100
+ complexity: 1
101
+ expectedBehaviors:
102
+ - "执行第 1 轮澄清"
103
+ - "正确识别用户终止意图"
104
+ - "提前终止澄清流程"
105
+ successCriteria:
106
+ - "终止识别准确"
107
+ - "需求文档完整"
108
+ qualityMetrics:
109
+ - "终止识别准确率 = 100%"
110
+ maxDuration: 300
111
+
112
+ successCriteria:
113
+ passRate: 85
114
+ avgFieldCompletion: 90
@@ -0,0 +1,51 @@
1
+ # AutoSpec Skill Benchmark Template - Field-Evolve-Analyzer
2
+ # 适用于: 测试 field-evolve-analyzer skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-field-evolve-analyzer"
7
+ description: "Field-Evolve-Analyzer Skill 基准测试"
8
+
9
+ type: skill
10
+ target: field-evolve-analyzer
11
+
12
+ testCases:
13
+ - name: "problem-analysis"
14
+ input:
15
+ context: "分析测试失败的问题"
16
+ complexity: 3
17
+ expectedBehaviors:
18
+ - "读取测试结果"
19
+ - "识别失败模式"
20
+ - "分析根因"
21
+ - "评估影响范围"
22
+ expectedOutput:
23
+ - "问题分析报告"
24
+ - "根因分析"
25
+ - "影响评估"
26
+ successCriteria:
27
+ - "失败模式识别准确"
28
+ - "根因分析深入"
29
+ qualityMetrics:
30
+ - "根因识别率 >= 85%"
31
+ maxDuration: 300
32
+
33
+ - name: "priority-calculation"
34
+ input:
35
+ context: "计算问题修复优先级"
36
+ complexity: 1
37
+ expectedBehaviors:
38
+ - "评估影响程度"
39
+ - "评估修复难度"
40
+ - "计算优先级分数"
41
+ expectedOutput:
42
+ - "优先级排序列表"
43
+ successCriteria:
44
+ - "优先级计算合理"
45
+ qualityMetrics:
46
+ - "优先级合理性 >= 90%"
47
+ maxDuration: 120
48
+
49
+ successCriteria:
50
+ passRate: 85
51
+ avgFieldCompletion: 90
@@ -0,0 +1,34 @@
1
+ # AutoSpec Skill Benchmark Template - Field-Evolve-Distiller
2
+ # 适用于: 测试 field-evolve-distiller skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-field-evolve-distiller"
7
+ description: "Field-Evolve-Distiller Skill 基准测试"
8
+
9
+ type: skill
10
+ target: field-evolve-distiller
11
+
12
+ testCases:
13
+ - name: "skill-distillation"
14
+ input:
15
+ context: "从实践日志提炼技能"
16
+ complexity: 3
17
+ expectedBehaviors:
18
+ - "读取进化规则"
19
+ - "收集 practice-log"
20
+ - "执行技能蒸馏"
21
+ - "验证进化效果"
22
+ expectedOutput:
23
+ - "distill-report.md"
24
+ - "distilled-skills/"
25
+ successCriteria:
26
+ - "practice-log 完整读取"
27
+ - "技能提炼符合规范"
28
+ qualityMetrics:
29
+ - "技能规范率 >= 90%"
30
+ maxDuration: 600
31
+
32
+ successCriteria:
33
+ passRate: 85
34
+ avgFieldCompletion: 90
@@ -0,0 +1,50 @@
1
+ # AutoSpec Skill Benchmark Template - Field-Evolve-Executor
2
+ # 适用于: 测试 field-evolve-executor skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-field-evolve-executor"
7
+ description: "Field-Evolve-Executor Skill 基准测试"
8
+
9
+ type: skill
10
+ target: field-evolve-executor
11
+
12
+ testCases:
13
+ - name: "benchmark-execution"
14
+ input:
15
+ context: "执行 benchmark 测试"
16
+ complexity: 3
17
+ expectedBehaviors:
18
+ - "读取 benchmark YAML"
19
+ - "创建临时目录"
20
+ - "执行完整流程"
21
+ - "捕获详细指标"
22
+ expectedOutput:
23
+ - "执行结果"
24
+ - "指标数据"
25
+ - "产出物"
26
+ successCriteria:
27
+ - "benchmark 执行完整"
28
+ - "指标捕获准确"
29
+ qualityMetrics:
30
+ - "benchmark 执行率 = 100%"
31
+ maxDuration: 900
32
+
33
+ - name: "isolated-execution"
34
+ input:
35
+ context: "在隔离环境中执行测试"
36
+ complexity: 5
37
+ expectedBehaviors:
38
+ - "创建临时 git worktree"
39
+ - "执行测试"
40
+ - "清理环境"
41
+ successCriteria:
42
+ - "隔离环境创建成功"
43
+ - "清理完整"
44
+ qualityMetrics:
45
+ - "隔离成功率 = 100%"
46
+ maxDuration: 600
47
+
48
+ successCriteria:
49
+ passRate: 85
50
+ avgFieldCompletion: 90
@@ -0,0 +1,52 @@
1
+ # AutoSpec Skill Benchmark Template - Field-Evolve-Fixer
2
+ # 适用于: 测试 field-evolve-fixer skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-field-evolve-fixer"
7
+ description: "Field-Evolve-Fixer Skill 基准测试"
8
+
9
+ type: skill
10
+ target: field-evolve-fixer
11
+
12
+ testCases:
13
+ - name: "auto-fix"
14
+ input:
15
+ context: "自动修复检测到的问题"
16
+ complexity: 3
17
+ expectedBehaviors:
18
+ - "读取问题清单"
19
+ - "分类可自动修复问题"
20
+ - "执行修复"
21
+ - "验证修复效果"
22
+ expectedOutput:
23
+ - "修复记录"
24
+ - "验证报告"
25
+ successCriteria:
26
+ - "修复有效"
27
+ - "无退化发生"
28
+ qualityMetrics:
29
+ - "修复有效率 >= 85%"
30
+ - "退化检出率 = 100%"
31
+ maxDuration: 600
32
+
33
+ - name: "manual-review-required"
34
+ input:
35
+ context: "处理需要人工审查的问题"
36
+ complexity: 1
37
+ expectedBehaviors:
38
+ - "识别需人工审查问题"
39
+ - "生成审查建议"
40
+ - "不自动执行"
41
+ expectedOutput:
42
+ - "审查建议清单"
43
+ successCriteria:
44
+ - "分类正确"
45
+ - "不自动执行"
46
+ qualityMetrics:
47
+ - "分类准确率 = 100%"
48
+ maxDuration: 120
49
+
50
+ successCriteria:
51
+ passRate: 85
52
+ avgFieldCompletion: 90
@@ -0,0 +1,33 @@
1
+ # AutoSpec Skill Benchmark Template - Field-Evolve-Learner
2
+ # 适用于: 测试 field-evolve-learner skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-field-evolve-learner"
7
+ description: "Field-Evolve-Learner Skill 基准测试"
8
+
9
+ type: skill
10
+ target: field-evolve-learner
11
+
12
+ testCases:
13
+ - name: "pattern-learning"
14
+ input:
15
+ context: "从实践日志中学习模式"
16
+ complexity: 3
17
+ expectedBehaviors:
18
+ - "读取 practice-log"
19
+ - "识别重复模式"
20
+ - "提取通用解决方案"
21
+ expectedOutput:
22
+ - "模式识别报告"
23
+ - "通用解决方案"
24
+ successCriteria:
25
+ - "模式识别准确"
26
+ - "解决方案合理"
27
+ qualityMetrics:
28
+ - "模式识别率 >= 80%"
29
+ maxDuration: 300
30
+
31
+ successCriteria:
32
+ passRate: 80
33
+ avgFieldCompletion: 85
@@ -0,0 +1,74 @@
1
+ # AutoSpec Skill Benchmark Template - Field-Evolve-Scanner
2
+ # 适用于: 测试 field-evolve-scanner skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-field-evolve-scanner"
7
+ description: "Field-Evolve-Scanner Skill 基准测试"
8
+
9
+ type: skill
10
+ target: field-evolve-scanner
11
+
12
+ testCases:
13
+ - name: "simple-project-scan"
14
+ input:
15
+ context: "扫描一个简单的 Node.js 项目"
16
+ complexity: 1
17
+ expectedBehaviors:
18
+ - "检测技术栈"
19
+ - "检测项目结构"
20
+ - "识别代码复杂度问题"
21
+ - "输出问题清单"
22
+ expectedOutput:
23
+ - "项目特征报告"
24
+ - "问题清单"
25
+ successCriteria:
26
+ - "技术栈检测正确"
27
+ - "问题识别完整"
28
+ qualityMetrics:
29
+ - "技术栈识别准确率 >= 95%"
30
+ - "问题识别率 >= 80%"
31
+ maxDuration: 300
32
+
33
+ - name: "multi-system-scan"
34
+ input:
35
+ context: "扫描包含前后端的多系统项目"
36
+ complexity: 5
37
+ expectedBehaviors:
38
+ - "检测多个子系统"
39
+ - "识别系统边界"
40
+ - "分析依赖关系"
41
+ - "启用兜底扫描分类文档"
42
+ expectedOutput:
43
+ - "多系统特征报告"
44
+ - "文档分类报告"
45
+ successCriteria:
46
+ - "子系统识别完整"
47
+ - "文档分类准确"
48
+ qualityMetrics:
49
+ - "子系统识别率 = 100%"
50
+ - "文档分类准确率 >= 90%"
51
+ maxDuration: 600
52
+
53
+ - name: "fallback-classification"
54
+ input:
55
+ context: "扫描项目并分类未匹配规则的文档"
56
+ complexity: 3
57
+ expectedBehaviors:
58
+ - "收集所有 Markdown 文件"
59
+ - "排除已规则匹配的文件"
60
+ - "使用 AI 模型分类"
61
+ expectedOutput:
62
+ - "设计文档列表"
63
+ - "业务文档列表"
64
+ - "测试文档列表"
65
+ successCriteria:
66
+ - "分类逻辑正确"
67
+ - "分类结果合理"
68
+ qualityMetrics:
69
+ - "分类准确率 >= 85%"
70
+ maxDuration: 450
71
+
72
+ successCriteria:
73
+ passRate: 85
74
+ avgFieldCompletion: 90
@@ -0,0 +1,71 @@
1
+ # AutoSpec Skill Benchmark Template - Field-Evolve
2
+ # 适用于: 测试 field-evolve skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-field-evolve"
7
+ description: "Field-Evolve Skill 基准测试 - 实战项目自进化"
8
+
9
+ type: skill
10
+ target: field-evolve
11
+
12
+ testCases:
13
+ - name: "quick-validation"
14
+ input:
15
+ context: "快速验证模式"
16
+ complexity: 1
17
+ expectedBehaviors:
18
+ - "读取 config.json"
19
+ - "执行编译检查"
20
+ - "执行单元测试"
21
+ - "执行 Lint 检查"
22
+ expectedOutput:
23
+ - "test-result.json"
24
+ - "quick-report.md"
25
+ successCriteria:
26
+ - "所有检查项执行"
27
+ - "结果记录完整"
28
+ qualityMetrics:
29
+ - "检查执行率 = 100%"
30
+ maxDuration: 300
31
+
32
+ - name: "deep-testing"
33
+ input:
34
+ context: "深度测试模式"
35
+ complexity: 5
36
+ expectedBehaviors:
37
+ - "扫描 benchmarks"
38
+ - "执行 benchmark 测试场景"
39
+ - "计算三维度评分"
40
+ expectedOutput:
41
+ - "deep-report.md"
42
+ - "evaluation.json"
43
+ successCriteria:
44
+ - "benchmark 执行完整"
45
+ - "评分计算正确"
46
+ qualityMetrics:
47
+ - "benchmark 执行率 = 100%"
48
+ maxDuration: 1800
49
+
50
+ - name: "full-cycle"
51
+ input:
52
+ context: "完整循环模式"
53
+ complexity: 5
54
+ expectedBehaviors:
55
+ - "执行深度测试"
56
+ - "生成改进方案"
57
+ - "执行自动修复"
58
+ - "技能蒸馏"
59
+ expectedOutput:
60
+ - "full-report.md"
61
+ - "distilled-skills/"
62
+ successCriteria:
63
+ - "完整循环执行成功"
64
+ - "无退化发生"
65
+ qualityMetrics:
66
+ - "修复有效率 >= 85%"
67
+ maxDuration: 2400
68
+
69
+ successCriteria:
70
+ passRate: 85
71
+ avgFieldCompletion: 90
@@ -0,0 +1,51 @@
1
+ # AutoSpec Skill Benchmark Template - Field-Evolve-Verifier
2
+ # 适用于: 测试 field-evolve-verifier skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-field-evolve-verifier"
7
+ description: "Field-Evolve-Verifier Skill 基准测试"
8
+
9
+ type: skill
10
+ target: field-evolve-verifier
11
+
12
+ testCases:
13
+ - name: "fix-verification"
14
+ input:
15
+ context: "验证修复效果"
16
+ complexity: 3
17
+ expectedBehaviors:
18
+ - "重新执行失败测试"
19
+ - "对比修复前后"
20
+ - "检测退化"
21
+ - "计算质量分数"
22
+ expectedOutput:
23
+ - "验证报告"
24
+ - "质量分数对比"
25
+ successCriteria:
26
+ - "验证执行完整"
27
+ - "退化检测正确"
28
+ qualityMetrics:
29
+ - "验证完整率 = 100%"
30
+ - "退化检出率 = 100%"
31
+ maxDuration: 450
32
+
33
+ - name: "benchmark-comparison"
34
+ input:
35
+ context: "对比 benchmark 结果"
36
+ complexity: 3
37
+ expectedBehaviors:
38
+ - "执行 benchmark"
39
+ - "对比历史结果"
40
+ - "识别退化"
41
+ expectedOutput:
42
+ - "对比报告"
43
+ successCriteria:
44
+ - "对比准确"
45
+ qualityMetrics:
46
+ - "对比准确率 = 100%"
47
+ maxDuration: 600
48
+
49
+ successCriteria:
50
+ passRate: 90
51
+ avgFieldCompletion: 95
@@ -0,0 +1,54 @@
1
+ # AutoSpec Skill Benchmark Template - Hotfix-Iteration
2
+ # 适用于: 测试 hotfix-iteration skill
3
+ # init 后复制到 .autospec/benchmarks/ 后按需修改
4
+
5
+ version: "1.0"
6
+ name: "skill-hotfix-iteration"
7
+ description: "Hotfix-Iteration Skill 基准测试 - 热修复快速流程"
8
+
9
+ type: skill
10
+ target: hotfix-iteration
11
+
12
+ testCases:
13
+ - name: "simple-hotfix"
14
+ input:
15
+ context: "修复生产环境 {bug-description}"
16
+ complexity: 1
17
+ expectedBehaviors:
18
+ - "问题诊断"
19
+ - "风险评估"
20
+ - "快速修复"
21
+ - "快速审查"
22
+ - "快速部署"
23
+ expectedOutput:
24
+ - "hotfix-issue.md"
25
+ - "变更代码"
26
+ - "回归测试"
27
+ successCriteria:
28
+ - "问题定位准确"
29
+ - "变更最小化"
30
+ - "回归测试通过"
31
+ qualityMetrics:
32
+ - "修复耗时 < 15 分钟"
33
+ - "变更行数 < 50"
34
+ maxDuration: 900
35
+
36
+ - name: "critical-hotfix"
37
+ input:
38
+ context: "修复生产环境严重安全漏洞"
39
+ complexity: 3
40
+ expectedBehaviors:
41
+ - "安全评估"
42
+ - "影响范围分析"
43
+ - "紧急修复"
44
+ - "安全审查"
45
+ successCriteria:
46
+ - "安全风险识别完整"
47
+ - "修复方案合理"
48
+ qualityMetrics:
49
+ - "修复耗时 < 30 分钟"
50
+ maxDuration: 1800
51
+
52
+ successCriteria:
53
+ passRate: 95
54
+ avgFieldCompletion: 95