@chongyan/autospec 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.en.md +447 -321
- package/README.md +418 -286
- package/knowledge/01-principles/00-principles-hierarchy.md +247 -0
- package/knowledge/01-principles/01-first-principles.md +241 -0
- package/knowledge/01-principles/02-strategic-principles.md +286 -0
- package/knowledge/01-principles/03-tactical-principles.md +385 -0
- package/knowledge/01-principles/04-operational-principles.md +275 -0
- package/knowledge/01-principles/05-domain-principles.md +539 -0
- package/knowledge/01-principles/06-methodology-principles.md +281 -0
- package/knowledge/01-principles/07-cognitive-principles.md +277 -0
- package/knowledge/01-principles/08-auto-fix-principles.md +320 -0
- package/knowledge/01-principles/09-constitution.md +220 -0
- package/knowledge/{principles/evolution.md → 01-principles/10-evolution-mechanism.md} +160 -14
- package/knowledge/01-principles/README.en.md +385 -0
- package/knowledge/01-principles/README.md +385 -0
- package/knowledge/{process/overview.md → 02-process/00-overview.md} +90 -5
- package/knowledge/02-process/README.en.md +143 -0
- package/knowledge/02-process/README.md +186 -0
- package/knowledge/{guides/support/pipeline-protocol.md → 03-guides/00-pipeline-protocol.md} +10 -10
- package/knowledge/{guides/support/team-orchestrator.md → 03-guides/01-team-orchestrator.md} +53 -8
- package/knowledge/{guides/stages/requirement-analyzer.md → 03-guides/02-analyze-requirement.md} +3 -3
- package/knowledge/{guides/stages/ai-effect-evaluator.md → 03-guides/08-evaluate-ai-effect.md} +14 -7
- package/knowledge/{guides/support/skill-distiller.md → 03-guides/19-distill-skill.md} +3 -3
- package/knowledge/{guides/support/skill-updater.md → 03-guides/20-update-skill.md} +1 -1
- package/knowledge/{guides/support/methodology-extractor.md → 03-guides/22-extract-methodology.md} +2 -2
- package/knowledge/{guides/support/complexity-assessor.md → 03-guides/24-assess-complexity.md} +6 -4
- package/knowledge/{guides/support/tech-stack-analyzer.md → 03-guides/26-analyze-tech-stack.md} +1 -1
- package/knowledge/{guides/domain-driven-design.md → 03-guides/42-apply-ddd.md} +1 -1
- package/knowledge/{process/ai-sdlc.md → 03-guides/43-run-ai-sdlc.md} +1 -1
- package/knowledge/{guides/knowledge-management.md → 03-guides/44-manage-knowledge.md} +4 -4
- package/knowledge/03-guides/README.en.md +212 -0
- package/knowledge/03-guides/README.md +212 -0
- package/knowledge/{checklists/requirement.md → 04-checklists/00-requirement.md} +1 -1
- package/knowledge/{checklists/design.md → 04-checklists/01-design.md} +1 -1
- package/knowledge/{checklists/code.md → 04-checklists/02-code.md} +16 -1
- package/knowledge/{checklists/release.md → 04-checklists/04-release.md} +1 -1
- package/knowledge/04-checklists/README.en.md +119 -0
- package/knowledge/04-checklists/README.md +123 -0
- package/knowledge/{config/validation-patterns.yaml → 05-config/00-validation-patterns.yaml} +1 -1
- package/knowledge/{config/team-tasks.yaml → 05-config/02-team-tasks.yaml} +2 -2
- package/knowledge/05-config/03-role-composition.yaml +346 -0
- package/knowledge/{config/skill-compositions.yaml → 05-config/05-skill-compositions.yaml} +24 -24
- package/knowledge/05-config/README.en.md +54 -0
- package/knowledge/05-config/README.md +132 -0
- package/knowledge/06-environment/00-template-registry.md +310 -0
- package/knowledge/06-environment/01-detection-patterns.yaml +1692 -0
- package/knowledge/{environment → 06-environment}/README.en.md +4 -0
- package/knowledge/{environment → 06-environment}/README.md +66 -25
- package/knowledge/{standards/coding-style.md → 07-standards/00-coding-style.md} +123 -4
- package/knowledge/{standards/code-review.md → 07-standards/01-code-review.md} +3 -3
- package/knowledge/{standards/data-consistency.md → 07-standards/02-data-consistency.md} +1 -1
- package/knowledge/{standards/document-versioning.md → 07-standards/03-document-versioning.md} +1 -1
- package/knowledge/{standards/risk-detection.md → 07-standards/04-risk-detection.md} +5 -5
- package/knowledge/07-standards/README.en.md +119 -0
- package/knowledge/07-standards/README.md +123 -0
- package/knowledge/08-organization/00-vision-mission.md +113 -0
- package/knowledge/{organization/ai-native-team.md → 08-organization/01-ai-native-culture.md} +1 -1
- package/knowledge/{organization/team-metrics.md → 08-organization/02-team-metrics.md} +1 -1
- package/knowledge/08-organization/03-committee-structure.md +54 -0
- package/knowledge/08-organization/04-governance-metrics.md +55 -0
- package/knowledge/08-organization/05-improvement-process.md +71 -0
- package/knowledge/08-organization/README.en.md +165 -0
- package/knowledge/08-organization/README.md +165 -0
- package/knowledge/09-templates/00-requirement-proposal.md +344 -0
- package/knowledge/09-templates/01-architecture-design.md +494 -0
- package/knowledge/09-templates/02-api-design.md +408 -0
- package/knowledge/09-templates/03-database-design.md +313 -0
- package/knowledge/09-templates/04-product-design.md +237 -0
- package/knowledge/09-templates/05-domain-business.md +388 -0
- package/knowledge/09-templates/06-test-design.md +268 -0
- package/knowledge/09-templates/07-evaluation-design.md +372 -0
- package/knowledge/09-templates/08-component-knowledge.md +272 -0
- package/knowledge/09-templates/09-best-practices.md +218 -0
- package/knowledge/{environment/middleware-knowledge.md → 09-templates/10-middleware-knowledge.md} +106 -1
- package/knowledge/09-templates/README.en.md +222 -0
- package/knowledge/09-templates/README.md +216 -0
- package/knowledge/README.en.md +372 -0
- package/knowledge/README.md +354 -99
- package/package.json +1 -1
- package/plugins/.claude-plugin/plugin.json +460 -81
- package/plugins/agents/roles/ceo.md +1 -1
- package/plugins/agents/roles/product-owner.md +1 -1
- package/plugins/agents/roles/tech-lead.md +1 -1
- package/plugins/agents/support/consistency-checker.md +36 -3
- package/plugins/agents/support/monitoring-agent.md +215 -0
- package/plugins/agents/support/safety-auditor.md +2 -2
- package/plugins/agents/support/stage-gate-evaluator.md +95 -11
- package/plugins/agents/support/test-coverage-reviewer.md +1 -1
- package/plugins/benchmarks/templates/README.md +165 -13
- package/plugins/benchmarks/templates/commands/apply-template.yaml +108 -0
- package/plugins/benchmarks/templates/commands/archive-template.yaml +65 -0
- package/plugins/benchmarks/templates/commands/env-export-template.yaml +64 -0
- package/plugins/benchmarks/templates/commands/env-sync-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/env-template-template.yaml +96 -0
- package/plugins/benchmarks/templates/commands/env-template.yaml +58 -0
- package/plugins/benchmarks/templates/commands/env-update-template.yaml +110 -0
- package/plugins/benchmarks/templates/commands/env-validate-template.yaml +95 -0
- package/plugins/benchmarks/templates/commands/field-evolve-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/project-evolve-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/propose-template.yaml +88 -0
- package/plugins/benchmarks/templates/commands/review-template.yaml +124 -0
- package/plugins/benchmarks/templates/commands/run-template.yaml +127 -0
- package/plugins/benchmarks/templates/commands/test-template.yaml +149 -0
- package/plugins/benchmarks/templates/pipeline/experiment-template.yaml +92 -0
- package/plugins/benchmarks/templates/pipeline/hotfix-template.yaml +81 -0
- package/plugins/benchmarks/templates/skills/agile-iteration-template.yaml +78 -0
- package/plugins/benchmarks/templates/skills/benchmark-executor-template.yaml +114 -0
- package/plugins/benchmarks/templates/skills/benchmark-generator-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/delivery-stage-template.yaml +130 -0
- package/plugins/benchmarks/templates/skills/design-stage-template.yaml +131 -0
- package/plugins/benchmarks/templates/skills/experiment-iteration-template.yaml +60 -0
- package/plugins/benchmarks/templates/skills/exploration-phase-template.yaml +114 -0
- package/plugins/benchmarks/templates/skills/field-evolve-analyzer-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/field-evolve-distiller-template.yaml +34 -0
- package/plugins/benchmarks/templates/skills/field-evolve-executor-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/field-evolve-fixer-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/field-evolve-learner-template.yaml +33 -0
- package/plugins/benchmarks/templates/skills/field-evolve-scanner-template.yaml +74 -0
- package/plugins/benchmarks/templates/skills/field-evolve-template.yaml +71 -0
- package/plugins/benchmarks/templates/skills/field-evolve-verifier-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/hotfix-iteration-template.yaml +54 -0
- package/plugins/benchmarks/templates/skills/implementation-stage-template.yaml +127 -0
- package/plugins/benchmarks/templates/skills/layer1-validation-template.yaml +121 -0
- package/plugins/benchmarks/templates/skills/project-evolve-analyzer-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/project-evolve-fixer-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/project-evolve-generator-template.yaml +34 -0
- package/plugins/benchmarks/templates/skills/project-evolve-learner-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/project-evolve-reviewer-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/project-evolve-scanner-template.yaml +75 -0
- package/plugins/benchmarks/templates/skills/project-evolve-template.yaml +72 -0
- package/plugins/benchmarks/templates/skills/project-evolve-verifier-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/skill-forge-template.yaml +117 -0
- package/plugins/benchmarks/templates/skills/startup-guard-template.yaml +103 -0
- package/plugins/benchmarks/templates/skills/testing-stage-template.yaml +146 -0
- package/plugins/benchmarks/templates/skills/waterfall-iteration-template.yaml +55 -0
- package/plugins/commands/README.en.md +2 -2
- package/plugins/commands/README.md +2 -2
- package/plugins/commands/apply.md +102 -16
- package/plugins/commands/archive.md +60 -4
- package/plugins/commands/env-sync.md +1047 -406
- package/plugins/commands/env-template.md +11 -135
- package/plugins/commands/env-update.md +1 -1
- package/plugins/commands/env-validate.md +3 -3
- package/plugins/commands/explore.md +118 -1
- package/plugins/commands/field-evolve.md +51 -175
- package/plugins/commands/project-evolve.md +167 -68
- package/plugins/commands/propose.md +97 -6
- package/plugins/commands/review.md +5 -5
- package/plugins/commands/run.md +841 -13
- package/plugins/commands/status.md +138 -17
- package/plugins/commands/test.md +389 -0
- package/plugins/hooks/constitution-guard.js +1 -1
- package/plugins/hooks/environment-autocommit.js +366 -24
- package/plugins/hooks/environment-manager.js +3 -2
- package/plugins/hooks/execution-tracker.js +109 -4
- package/plugins/hooks/layer1-validator.js +117 -1
- package/plugins/hooks/lib/auto-fix-loop.js +605 -0
- package/plugins/hooks/lib/environment-config-loader.js +11 -7
- package/plugins/hooks/lib/hook-state-manager.js +98 -0
- package/plugins/hooks/lib/memory-extractor.js +27 -5
- package/plugins/hooks/lib/memory-manager.js +1 -1
- package/plugins/hooks/lib/test-auto-fix.test.js +194 -0
- package/plugins/hooks/monitoring-trigger.js +467 -0
- package/plugins/skills/README.en.md +15 -3
- package/plugins/skills/README.md +21 -11
- package/plugins/skills/agile-iteration/SKILL.md +187 -0
- package/plugins/skills/delivery-stage/SKILL.md +133 -12
- package/plugins/skills/design-stage/SKILL.md +103 -12
- package/plugins/skills/experiment-evaluator/SKILL.md +271 -0
- package/plugins/skills/experiment-iteration/SKILL.md +154 -0
- package/plugins/skills/exploration-phase/SKILL.md +93 -10
- package/plugins/skills/field-evolve-analyzer/SKILL.md +65 -0
- package/plugins/skills/field-evolve-distiller/SKILL.md +66 -0
- package/plugins/skills/field-evolve-executor/SKILL.md +94 -0
- package/plugins/skills/field-evolve-executor/executor.js +342 -0
- package/plugins/skills/field-evolve-fixer/SKILL.md +69 -0
- package/plugins/skills/field-evolve-learner/SKILL.md +65 -0
- package/plugins/skills/field-evolve-scanner/SKILL.md +87 -0
- package/plugins/skills/field-evolve-scanner/scripts/fallback-scanner.js +288 -0
- package/plugins/skills/field-evolve-verifier/SKILL.md +64 -0
- package/plugins/skills/hotfix-iteration/SKILL.md +279 -0
- package/plugins/skills/implementation-stage/SKILL.md +156 -15
- package/plugins/skills/layer1-validation/SKILL.md +1 -1
- package/plugins/skills/pending-dashboard/SKILL.md +9 -8
- package/plugins/skills/project-evolve-analyzer/SKILL.md +95 -0
- package/plugins/skills/project-evolve-fixer/SKILL.md +99 -0
- package/plugins/skills/project-evolve-generator/SKILL.md +149 -0
- package/plugins/skills/project-evolve-learner/SKILL.md +103 -0
- package/plugins/skills/project-evolve-reviewer/SKILL.md +104 -0
- package/plugins/skills/project-evolve-scanner/SKILL.md +95 -0
- package/plugins/skills/project-evolve-scanner/scripts/dependency-reuse-checker.js +395 -0
- package/plugins/skills/project-evolve-scanner/scripts/subsystem-coverage.js +315 -0
- package/plugins/skills/project-evolve-verifier/SKILL.md +105 -0
- package/plugins/skills/requirement-stage/SKILL.md +47 -13
- package/plugins/skills/skill-forge/SKILL.md +2 -2
- package/plugins/skills/testing-stage/SKILL.md +583 -8
- package/plugins/skills/waterfall-iteration/SKILL.md +115 -0
- package/scripts/cli/index.js +1 -1
- package/scripts/cli/init.js +30 -4
- package/scripts/cli/list.js +3 -2
- package/scripts/config/commands.config.js +8 -8
- package/scripts/config/hooks.config.js +1 -1
- package/scripts/install/constants.js +204 -165
- package/scripts/state.js +210 -1
- package/knowledge/config/README.en.md +0 -44
- package/knowledge/config/README.md +0 -44
- package/knowledge/config/role-composition.yaml +0 -98
- package/knowledge/config/team-triggers.yaml +0 -198
- package/knowledge/domain/README.md +0 -115
- package/knowledge/domain/flows/README.md +0 -194
- package/knowledge/domain/glossary.md +0 -143
- package/knowledge/domain/rules.md +0 -138
- package/knowledge/environment/component-knowledge.md +0 -316
- package/knowledge/environment/detection-patterns.yaml +0 -502
- package/knowledge/environment/template-registry.md +0 -321
- package/knowledge/guides/requirement-engineering.md +0 -329
- package/knowledge/guides/system-design.md +0 -352
- package/knowledge/principles/constitution.md +0 -134
- package/knowledge/principles/core-principles.md +0 -368
- package/knowledge/principles/design-philosophy.md +0 -877
- package/knowledge/process/README.en.md +0 -38
- package/knowledge/process/README.md +0 -48
- package/knowledge/templates/ai-evaluation.md +0 -150
- package/knowledge/templates/api-design.md +0 -117
- package/knowledge/templates/database-design.md +0 -132
- package/knowledge/templates/domain-driven-design.md +0 -321
- package/knowledge/templates/product-proposal.md +0 -201
- package/knowledge/templates/system-design.md +0 -227
- package/knowledge/templates/task-breakdown.md +0 -107
- package/knowledge/templates/test-case.md +0 -170
- package/plugins/commands/validate.md +0 -108
- package/plugins/skills/benchmark-executor/README.md +0 -93
- package/plugins/skills/evolution-process/SKILL.md +0 -291
- package/plugins/skills/project-evolution/SKILL.md +0 -847
- package/scripts/evolution/evolution-router.js +0 -273
- package/scripts/evolution/evolution-signal-collector.js +0 -307
- package/scripts/evolution/knowledge-loader.js +0 -346
- package/scripts/evolution/marketplace.js +0 -317
- package/scripts/evolution/version-manager.js +0 -371
- /package/knowledge/{process → 02-process}/01-requirement.md +0 -0
- /package/knowledge/{process → 02-process}/02-design.md +0 -0
- /package/knowledge/{process → 02-process}/03-implementation.md +0 -0
- /package/knowledge/{process → 02-process}/04-review.md +0 -0
- /package/knowledge/{process → 02-process}/05-testing.md +0 -0
- /package/knowledge/{process → 02-process}/06-delivery.md +0 -0
- /package/knowledge/{guides/stages/design-planner.md → 03-guides/03-design-solution.md} +0 -0
- /package/knowledge/{guides/stages/code-implementer.md → 03-guides/04-implement-code.md} +0 -0
- /package/knowledge/{guides/stages/test-planner.md → 03-guides/05-plan-testing.md} +0 -0
- /package/knowledge/{guides/stages/test-generator.md → 03-guides/06-generate-tests.md} +0 -0
- /package/knowledge/{guides/stages/release-checker.md → 03-guides/07-check-release.md} +0 -0
- /package/knowledge/{guides/stages/requirement-reviewer.md → 03-guides/09-review-requirement.md} +0 -0
- /package/knowledge/{guides/stages/design-reviewer.md → 03-guides/10-review-design.md} +0 -0
- /package/knowledge/{guides/stages/code-reviewer.md → 03-guides/11-review-code.md} +0 -0
- /package/knowledge/{guides/stages/test-reviewer.md → 03-guides/12-review-testing.md} +0 -0
- /package/knowledge/{guides/stages/security-reviewer.md → 03-guides/13-audit-security.md} +0 -0
- /package/knowledge/{guides/stages/consistency-checker.md → 03-guides/14-check-consistency.md} +0 -0
- /package/knowledge/{guides/stages/unit-test-runner.md → 03-guides/15-run-unit-tests.md} +0 -0
- /package/knowledge/{guides/stages/integration-test-runner.md → 03-guides/16-run-integration-tests.md} +0 -0
- /package/knowledge/{guides/stages/test-context-analyzer.md → 03-guides/17-analyze-test-context.md} +0 -0
- /package/knowledge/{guides/support/practice-logger.md → 03-guides/18-log-practice.md} +0 -0
- /package/knowledge/{guides/support/skill-validator.md → 03-guides/21-validate-skill.md} +0 -0
- /package/knowledge/{guides/support/scope-inference.md → 03-guides/23-infer-scope.md} +0 -0
- /package/knowledge/{guides/support/component-discovery.md → 03-guides/25-discover-component.md} +0 -0
- /package/knowledge/{guides/support/environment-scanner.md → 03-guides/27-scan-environment.md} +0 -0
- /package/knowledge/{guides/support/environment-validator.md → 03-guides/28-validate-environment.md} +0 -0
- /package/knowledge/{guides/support/knowledge-generator.md → 03-guides/29-generate-knowledge.md} +0 -0
- /package/knowledge/{guides/support/ai-capability-analyzer.md → 03-guides/30-analyze-ai-capability.md} +0 -0
- /package/knowledge/{guides/support/ai-component-analyzer.md → 03-guides/31-analyze-ai-component.md} +0 -0
- /package/knowledge/{guides/support/ai-agent-analyzer.md → 03-guides/32-analyze-ai-agent.md} +0 -0
- /package/knowledge/{guides/support/ai-rag-analyzer.md → 03-guides/33-analyze-ai-rag.md} +0 -0
- /package/knowledge/{guides/support/ai-task-assessor.md → 03-guides/34-assess-ai-task.md} +0 -0
- /package/knowledge/{guides/support/ai-pipeline-evaluator.md → 03-guides/35-evaluate-ai-pipeline.md} +0 -0
- /package/knowledge/{guides/support/ai-artifact-evaluator.md → 03-guides/36-evaluate-ai-artifact.md} +0 -0
- /package/knowledge/{guides/support/ai-evaluation-planner.md → 03-guides/37-plan-ai-evaluation.md} +0 -0
- /package/knowledge/{guides/support/ai-path-evaluator.md → 03-guides/38-evaluate-ai-path.md} +0 -0
- /package/knowledge/{guides/support/ai-data-validator.md → 03-guides/39-validate-ai-data.md} +0 -0
- /package/knowledge/{guides/support/ai-anomaly-analyzer.md → 03-guides/40-detect-ai-anomaly.md} +0 -0
- /package/knowledge/{guides/support/ai-test-diagnostics.md → 03-guides/41-diagnose-ai-test.md} +0 -0
- /package/knowledge/{guides/support/test-runner.md → 03-guides/45-test-runner.md} +0 -0
- /package/knowledge/{checklists/test.md → 04-checklists/03-test.md} +0 -0
- /package/knowledge/{config/team-stage.yaml → 05-config/01-team-stage.yaml} +0 -0
- /package/knowledge/{config/role-extensions.yaml → 05-config/04-role-extensions.yaml} +0 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# AutoSpec Pipeline Benchmark Template - Experiment
|
|
2
|
+
# 适用于: 技术探索 / AI 功能验证
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "experiment-{project-name}"
|
|
7
|
+
description: "实验模式 benchmark"
|
|
8
|
+
|
|
9
|
+
type: pipeline
|
|
10
|
+
scenario:
|
|
11
|
+
name: "技术探索实验"
|
|
12
|
+
input: "验证 {technology} 用于 {use-case} 的可行性"
|
|
13
|
+
complexity: 3
|
|
14
|
+
expectedStages:
|
|
15
|
+
- requirement
|
|
16
|
+
- design
|
|
17
|
+
- implement
|
|
18
|
+
- test
|
|
19
|
+
- deliver
|
|
20
|
+
|
|
21
|
+
stageCriteria:
|
|
22
|
+
requirement:
|
|
23
|
+
expectedArtifacts:
|
|
24
|
+
- "specs/experiment/requirement.md"
|
|
25
|
+
- "specs/experiment/hypothesis.md"
|
|
26
|
+
layer1Check:
|
|
27
|
+
- "实验目标清晰"
|
|
28
|
+
- "假设可验证"
|
|
29
|
+
layer2Check:
|
|
30
|
+
- "成功指标明确"
|
|
31
|
+
maxDuration: 300
|
|
32
|
+
|
|
33
|
+
design:
|
|
34
|
+
expectedArtifacts:
|
|
35
|
+
- "specs/experiment/design.md"
|
|
36
|
+
- "specs/experiment/evaluation.md"
|
|
37
|
+
layer1Check:
|
|
38
|
+
- "实验设计合理"
|
|
39
|
+
- "评测方案可行"
|
|
40
|
+
layer2Check:
|
|
41
|
+
- "备选方案准备"
|
|
42
|
+
maxDuration: 450
|
|
43
|
+
|
|
44
|
+
implement:
|
|
45
|
+
expectedArtifacts:
|
|
46
|
+
- "src/ 原型代码"
|
|
47
|
+
- "experiments/ 实验脚本"
|
|
48
|
+
layer1Check:
|
|
49
|
+
- "原型可运行"
|
|
50
|
+
- "实验脚本可执行"
|
|
51
|
+
layer2Check:
|
|
52
|
+
- "数据收集完整"
|
|
53
|
+
maxDuration: 900
|
|
54
|
+
|
|
55
|
+
test:
|
|
56
|
+
expectedArtifacts:
|
|
57
|
+
- "experiments/results.md"
|
|
58
|
+
layer1Check:
|
|
59
|
+
- "实验执行完成"
|
|
60
|
+
- "结果记录完整"
|
|
61
|
+
layer2Check:
|
|
62
|
+
- "效果评估客观"
|
|
63
|
+
maxDuration: 600
|
|
64
|
+
|
|
65
|
+
deliver:
|
|
66
|
+
expectedArtifacts:
|
|
67
|
+
- "experiments/conclusion.md"
|
|
68
|
+
layer1Check:
|
|
69
|
+
- "结论清晰"
|
|
70
|
+
layer2Check:
|
|
71
|
+
- "知识沉淀完整"
|
|
72
|
+
maxDuration: 300
|
|
73
|
+
|
|
74
|
+
pipelineMetrics:
|
|
75
|
+
expected:
|
|
76
|
+
totalDuration: 2550
|
|
77
|
+
maxRollbacks: 1
|
|
78
|
+
layer1PassRate: 85
|
|
79
|
+
layer2PassRate: 80
|
|
80
|
+
artifactCompletionRate: 90
|
|
81
|
+
successRate: 70
|
|
82
|
+
learningOutcome: 80
|
|
83
|
+
|
|
84
|
+
qualityThresholds:
|
|
85
|
+
pass:
|
|
86
|
+
overallScore: 75
|
|
87
|
+
layer1PassRate: 80
|
|
88
|
+
layer2PassRate: 75
|
|
89
|
+
excellent:
|
|
90
|
+
overallScore: 85
|
|
91
|
+
layer1PassRate: 90
|
|
92
|
+
layer2PassRate: 85
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# AutoSpec Pipeline Benchmark Template - Hotfix
|
|
2
|
+
# 适用于: 紧急 Bug 修复模式
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "hotfix-{project-name}"
|
|
7
|
+
description: "热修复模式 benchmark"
|
|
8
|
+
|
|
9
|
+
type: pipeline
|
|
10
|
+
scenario:
|
|
11
|
+
name: "紧急 Bug 修复"
|
|
12
|
+
input: "修复生产环境 {bug-description} 的紧急 bug"
|
|
13
|
+
complexity: 1
|
|
14
|
+
expectedStages:
|
|
15
|
+
- requirement
|
|
16
|
+
- implement
|
|
17
|
+
- test
|
|
18
|
+
- deliver
|
|
19
|
+
|
|
20
|
+
stageCriteria:
|
|
21
|
+
requirement:
|
|
22
|
+
expectedArtifacts:
|
|
23
|
+
- "specs/hotfix-{id}/bug-report.md"
|
|
24
|
+
layer1Check:
|
|
25
|
+
- "bug 描述清晰"
|
|
26
|
+
- "复现步骤完整"
|
|
27
|
+
layer2Check:
|
|
28
|
+
- "影响范围明确"
|
|
29
|
+
- "快速确认"
|
|
30
|
+
maxDuration: 180
|
|
31
|
+
|
|
32
|
+
implement:
|
|
33
|
+
expectedArtifacts:
|
|
34
|
+
- "最小化修复代码"
|
|
35
|
+
- "回归测试"
|
|
36
|
+
layer1Check:
|
|
37
|
+
- "修复有效"
|
|
38
|
+
- "无新 bug 引入"
|
|
39
|
+
layer2Check:
|
|
40
|
+
- "最小化改动"
|
|
41
|
+
maxDuration: 300
|
|
42
|
+
|
|
43
|
+
test:
|
|
44
|
+
expectedArtifacts:
|
|
45
|
+
- "快速测试报告"
|
|
46
|
+
layer1Check:
|
|
47
|
+
- "bug 验证通过"
|
|
48
|
+
- "关键路径回归通过"
|
|
49
|
+
layer2Check:
|
|
50
|
+
- "测试覆盖合理"
|
|
51
|
+
maxDuration: 180
|
|
52
|
+
|
|
53
|
+
deliver:
|
|
54
|
+
expectedArtifacts:
|
|
55
|
+
- "发布说明"
|
|
56
|
+
- "复盘报告"
|
|
57
|
+
layer1Check:
|
|
58
|
+
- "发布就绪"
|
|
59
|
+
layer2Check:
|
|
60
|
+
- "复盘完整"
|
|
61
|
+
maxDuration: 120
|
|
62
|
+
|
|
63
|
+
pipelineMetrics:
|
|
64
|
+
expected:
|
|
65
|
+
totalDuration: 780
|
|
66
|
+
maxRollbacks: 0
|
|
67
|
+
layer1PassRate: 95
|
|
68
|
+
layer2PassRate: 90
|
|
69
|
+
artifactCompletionRate: 100
|
|
70
|
+
fixTime: 300
|
|
71
|
+
deployTime: 120
|
|
72
|
+
|
|
73
|
+
qualityThresholds:
|
|
74
|
+
pass:
|
|
75
|
+
overallScore: 90
|
|
76
|
+
layer1PassRate: 95
|
|
77
|
+
layer2PassRate: 90
|
|
78
|
+
excellent:
|
|
79
|
+
overallScore: 95
|
|
80
|
+
layer1PassRate: 100
|
|
81
|
+
layer2PassRate: 95
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Agile-Iteration
|
|
2
|
+
# 适用于: 测试 agile-iteration skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-agile-iteration"
|
|
7
|
+
description: "Agile-Iteration Skill 基准测试 - 敏捷故事循环"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: agile-iteration
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "single-story"
|
|
14
|
+
input:
|
|
15
|
+
context: "使用敏捷模式开发单个用户故事"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "加载故事列表"
|
|
19
|
+
- "执行故事设计"
|
|
20
|
+
- "执行故事编码"
|
|
21
|
+
- "执行故事审查"
|
|
22
|
+
- "更新故事状态"
|
|
23
|
+
expectedOutput:
|
|
24
|
+
- "stories/{id}/design.md"
|
|
25
|
+
- "代码变更"
|
|
26
|
+
- "更新后的 stories.md"
|
|
27
|
+
successCriteria:
|
|
28
|
+
- "故事设计完整"
|
|
29
|
+
- "审查通过"
|
|
30
|
+
- "状态更新正确"
|
|
31
|
+
qualityMetrics:
|
|
32
|
+
- "故事完成率 = 100%"
|
|
33
|
+
- "验证通过率 >= 90%"
|
|
34
|
+
maxDuration: 600
|
|
35
|
+
|
|
36
|
+
- name: "multi-story-sequential"
|
|
37
|
+
input:
|
|
38
|
+
context: "使用敏捷模式顺序执行多个用户故事"
|
|
39
|
+
complexity: 3
|
|
40
|
+
expectedBehaviors:
|
|
41
|
+
- "加载故事列表"
|
|
42
|
+
- "按优先级排序"
|
|
43
|
+
- "顺序执行每个故事"
|
|
44
|
+
- "集成测试"
|
|
45
|
+
expectedOutput:
|
|
46
|
+
- "各故事设计文档"
|
|
47
|
+
- "集成测试报告"
|
|
48
|
+
successCriteria:
|
|
49
|
+
- "所有故事完成"
|
|
50
|
+
- "集成测试通过"
|
|
51
|
+
qualityMetrics:
|
|
52
|
+
- "故事完成率 = 100%"
|
|
53
|
+
- "集成测试通过率 >= 90%"
|
|
54
|
+
maxDuration: 1200
|
|
55
|
+
|
|
56
|
+
- name: "parallel-stories"
|
|
57
|
+
input:
|
|
58
|
+
context: "使用敏捷模式并行执行无依赖的用户故事"
|
|
59
|
+
complexity: 5
|
|
60
|
+
expectedBehaviors:
|
|
61
|
+
- "分析故事依赖关系"
|
|
62
|
+
- "识别可并行故事"
|
|
63
|
+
- "并行执行故事"
|
|
64
|
+
- "处理代码冲突"
|
|
65
|
+
expectedOutput:
|
|
66
|
+
- "并行执行报告"
|
|
67
|
+
- "冲突检测结果"
|
|
68
|
+
successCriteria:
|
|
69
|
+
- "依赖分析正确"
|
|
70
|
+
- "并行执行成功"
|
|
71
|
+
qualityMetrics:
|
|
72
|
+
- "并行效率提升 >= 30%"
|
|
73
|
+
- "冲突检测准确率 >= 95%"
|
|
74
|
+
maxDuration: 900
|
|
75
|
+
|
|
76
|
+
successCriteria:
|
|
77
|
+
passRate: 85
|
|
78
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Benchmark-Executor
|
|
2
|
+
# 适用于: 测试 benchmark-executor skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-benchmark-executor"
|
|
7
|
+
description: "Benchmark-Executor Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: benchmark-executor
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "requirement-stage-evaluation"
|
|
14
|
+
input:
|
|
15
|
+
context: "执行 requirement 阶段 benchmarks 评测"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "加载 benchmarks 文件"
|
|
19
|
+
- "检测阶段产出物"
|
|
20
|
+
- "执行 Layer1 Schema 校验"
|
|
21
|
+
- "执行 Layer2 AI 审查"
|
|
22
|
+
- "计算评分与等级"
|
|
23
|
+
expectedOutput:
|
|
24
|
+
- "评测报告"
|
|
25
|
+
- "改进建议清单"
|
|
26
|
+
successCriteria:
|
|
27
|
+
- "benchmarks 加载完整"
|
|
28
|
+
- "Layer1+Layer2 执行完成"
|
|
29
|
+
- "评分计算正确"
|
|
30
|
+
qualityMetrics:
|
|
31
|
+
- "benchmarks 加载率 = 100%"
|
|
32
|
+
- "Layer1 执行率 = 100%"
|
|
33
|
+
- "评分准确率 >= 90%"
|
|
34
|
+
maxDuration: 600
|
|
35
|
+
|
|
36
|
+
- name: "design-stage-evaluation"
|
|
37
|
+
input:
|
|
38
|
+
context: "执行 design 阶段 benchmarks 评测"
|
|
39
|
+
complexity: 3
|
|
40
|
+
expectedBehaviors:
|
|
41
|
+
- "加载 design benchmarks"
|
|
42
|
+
- "检测产出物"
|
|
43
|
+
- "执行架构设计完整性检查"
|
|
44
|
+
- "执行设计-需求一致性检查"
|
|
45
|
+
expectedOutput:
|
|
46
|
+
- "design 评测报告"
|
|
47
|
+
- "一致性检查结果"
|
|
48
|
+
successCriteria:
|
|
49
|
+
- "产出物检测完整"
|
|
50
|
+
- "一致性检查执行"
|
|
51
|
+
qualityMetrics:
|
|
52
|
+
- "产出物检测率 = 100%"
|
|
53
|
+
- "一致性检查执行率 = 100%"
|
|
54
|
+
maxDuration: 600
|
|
55
|
+
|
|
56
|
+
- name: "implementation-stage-evaluation"
|
|
57
|
+
input:
|
|
58
|
+
context: "执行 implementation 阶段 benchmarks 评测"
|
|
59
|
+
complexity: 5
|
|
60
|
+
expectedBehaviors:
|
|
61
|
+
- "加载 implementation benchmarks"
|
|
62
|
+
- "执行 Layer1 验证"
|
|
63
|
+
- "执行代码-设计一致性检查"
|
|
64
|
+
- "执行代码质量基准检查"
|
|
65
|
+
expectedOutput:
|
|
66
|
+
- "implementation 评测报告"
|
|
67
|
+
- "代码质量报告"
|
|
68
|
+
- "覆盖率报告"
|
|
69
|
+
successCriteria:
|
|
70
|
+
- "Layer1 验证真实执行"
|
|
71
|
+
- "代码质量检查完整"
|
|
72
|
+
qualityMetrics:
|
|
73
|
+
- "Layer1 执行率 = 100%"
|
|
74
|
+
- "代码检查完整率 >= 90%"
|
|
75
|
+
maxDuration: 900
|
|
76
|
+
|
|
77
|
+
- name: "multi-system-evaluation"
|
|
78
|
+
input:
|
|
79
|
+
context: "执行多系统项目 benchmarks"
|
|
80
|
+
complexity: 5
|
|
81
|
+
expectedBehaviors:
|
|
82
|
+
- "识别所有子系统"
|
|
83
|
+
- "执行接口契约一致性检查"
|
|
84
|
+
- "执行跨系统设计一致性检查"
|
|
85
|
+
expectedOutput:
|
|
86
|
+
- "多系统评测报告"
|
|
87
|
+
- "契约一致性检查结果"
|
|
88
|
+
successCriteria:
|
|
89
|
+
- "子系统识别完整"
|
|
90
|
+
- "契约一致性检查执行"
|
|
91
|
+
qualityMetrics:
|
|
92
|
+
- "子系统识别率 = 100%"
|
|
93
|
+
- "契约检查执行率 = 100%"
|
|
94
|
+
maxDuration: 900
|
|
95
|
+
|
|
96
|
+
- name: "auto-trigger-evaluation"
|
|
97
|
+
input:
|
|
98
|
+
context: "产出物变更时自动触发 benchmarks"
|
|
99
|
+
complexity: 3
|
|
100
|
+
expectedBehaviors:
|
|
101
|
+
- "检测产出物变更"
|
|
102
|
+
- "自动触发 benchmarks 评测"
|
|
103
|
+
- "执行防抖处理"
|
|
104
|
+
successCriteria:
|
|
105
|
+
- "变更检测准确"
|
|
106
|
+
- "自动触发执行"
|
|
107
|
+
qualityMetrics:
|
|
108
|
+
- "变更检测率 = 100%"
|
|
109
|
+
- "自动触发执行率 = 100%"
|
|
110
|
+
maxDuration: 300
|
|
111
|
+
|
|
112
|
+
successCriteria:
|
|
113
|
+
passRate: 85
|
|
114
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Benchmark-Generator
|
|
2
|
+
# 适用于: 测试 benchmark-generator skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-benchmark-generator"
|
|
7
|
+
description: "Benchmark-Generator Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: benchmark-generator
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "generate-from-requirement"
|
|
14
|
+
input:
|
|
15
|
+
context: "根据需求文档生成 benchmark"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取需求文档"
|
|
19
|
+
- "提取关键场景"
|
|
20
|
+
- "生成测试用例"
|
|
21
|
+
- "生成 benchmark YAML"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "generated-benchmark.yaml"
|
|
24
|
+
- "测试用例清单"
|
|
25
|
+
successCriteria:
|
|
26
|
+
- "场景提取完整"
|
|
27
|
+
- "benchmark 格式正确"
|
|
28
|
+
qualityMetrics:
|
|
29
|
+
- "场景覆盖率 >= 90%"
|
|
30
|
+
- "格式正确率 = 100%"
|
|
31
|
+
maxDuration: 300
|
|
32
|
+
|
|
33
|
+
- name: "generate-from-design"
|
|
34
|
+
input:
|
|
35
|
+
context: "根据设计文档生成 benchmark"
|
|
36
|
+
complexity: 3
|
|
37
|
+
expectedBehaviors:
|
|
38
|
+
- "读取设计文档"
|
|
39
|
+
- "提取 API 接口"
|
|
40
|
+
- "生成 API 测试 benchmark"
|
|
41
|
+
expectedOutput:
|
|
42
|
+
- "api-benchmark.yaml"
|
|
43
|
+
successCriteria:
|
|
44
|
+
- "API 提取完整"
|
|
45
|
+
- "benchmark 可执行"
|
|
46
|
+
qualityMetrics:
|
|
47
|
+
- "API 覆盖率 >= 90%"
|
|
48
|
+
maxDuration: 300
|
|
49
|
+
|
|
50
|
+
successCriteria:
|
|
51
|
+
passRate: 85
|
|
52
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Delivery-Stage
|
|
2
|
+
# 适用于: 测试 delivery-stage skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-delivery-stage"
|
|
7
|
+
description: "Delivery-Stage Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: delivery-stage
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "simple-delivery"
|
|
14
|
+
input:
|
|
15
|
+
context: "交付 {feature-name} 功能"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "执行发布检查清单"
|
|
19
|
+
- "执行一致性检查"
|
|
20
|
+
- "生成交付文档"
|
|
21
|
+
- "执行 Layer1 最终验证"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "release-notes.md"
|
|
24
|
+
- "deployment-guide.md"
|
|
25
|
+
- "consistency-report.md"
|
|
26
|
+
- "delivery-checklist.md"
|
|
27
|
+
successCriteria:
|
|
28
|
+
- "发布检查清单完整执行"
|
|
29
|
+
- "一致性检查通过"
|
|
30
|
+
- "交付文档完整"
|
|
31
|
+
qualityMetrics:
|
|
32
|
+
- "文档完整率 >= 90%"
|
|
33
|
+
- "交付清单执行率 = 100%"
|
|
34
|
+
maxDuration: 300
|
|
35
|
+
|
|
36
|
+
- name: "multi-system-delivery"
|
|
37
|
+
input:
|
|
38
|
+
context: "交付 {feature} 系统(backend/frontend/mobile)"
|
|
39
|
+
complexity: 5
|
|
40
|
+
expectedBehaviors:
|
|
41
|
+
- "识别所有子系统"
|
|
42
|
+
- "执行各子系统发布检查"
|
|
43
|
+
- "执行集成一致性检查"
|
|
44
|
+
- "生成多系统部署方案"
|
|
45
|
+
- "生成回滚方案"
|
|
46
|
+
expectedOutput:
|
|
47
|
+
- "release-notes.md"
|
|
48
|
+
- "deployment-plan.md"
|
|
49
|
+
- "rollback-plan.md"
|
|
50
|
+
- "integration-report.md"
|
|
51
|
+
successCriteria:
|
|
52
|
+
- "所有子系统交付完整"
|
|
53
|
+
- "部署方案可行"
|
|
54
|
+
- "回滚方案完整"
|
|
55
|
+
qualityMetrics:
|
|
56
|
+
- "子系统覆盖率 = 100%"
|
|
57
|
+
- "回滚方案完整率 >= 90%"
|
|
58
|
+
maxDuration: 600
|
|
59
|
+
|
|
60
|
+
- name: "ai-delivery"
|
|
61
|
+
input:
|
|
62
|
+
context: "交付 AI 功能(含模型)"
|
|
63
|
+
complexity: 5
|
|
64
|
+
expectedBehaviors:
|
|
65
|
+
- "执行模型效果验收"
|
|
66
|
+
- "执行模型性能验收"
|
|
67
|
+
- "生成模型卡片"
|
|
68
|
+
- "生成 A/B 测试方案"
|
|
69
|
+
- "生成监控方案"
|
|
70
|
+
expectedOutput:
|
|
71
|
+
- "model-card.md"
|
|
72
|
+
- "acceptance-report.md"
|
|
73
|
+
- "ab-test-plan.md"
|
|
74
|
+
- "monitoring-plan.md"
|
|
75
|
+
successCriteria:
|
|
76
|
+
- "效果指标达标"
|
|
77
|
+
- "模型卡片完整"
|
|
78
|
+
- "监控方案完整"
|
|
79
|
+
qualityMetrics:
|
|
80
|
+
- "效果验收执行率 = 100%"
|
|
81
|
+
- "模型卡片完整率 >= 90%"
|
|
82
|
+
maxDuration: 600
|
|
83
|
+
|
|
84
|
+
- name: "safety-audit"
|
|
85
|
+
input:
|
|
86
|
+
context: "执行生产数据库迁移"
|
|
87
|
+
complexity: 3
|
|
88
|
+
expectedBehaviors:
|
|
89
|
+
- "执行安全审计"
|
|
90
|
+
- "验证数据备份方案"
|
|
91
|
+
- "验证回滚方案"
|
|
92
|
+
- "生成人工确认清单"
|
|
93
|
+
expectedOutput:
|
|
94
|
+
- "safety-audit.md"
|
|
95
|
+
- "backup-plan.md"
|
|
96
|
+
- "rollback-plan.md"
|
|
97
|
+
- "manual-checklist.md"
|
|
98
|
+
successCriteria:
|
|
99
|
+
- "安全审计执行"
|
|
100
|
+
- "备份方案验证通过"
|
|
101
|
+
- "人工确认清单完整"
|
|
102
|
+
qualityMetrics:
|
|
103
|
+
- "安全审计执行率 = 100%"
|
|
104
|
+
- "人工确认清单完整率 = 100%"
|
|
105
|
+
maxDuration: 450
|
|
106
|
+
|
|
107
|
+
- name: "practice-log"
|
|
108
|
+
input:
|
|
109
|
+
context: "记录交付后的实践日志和进化信号"
|
|
110
|
+
complexity: 3
|
|
111
|
+
expectedBehaviors:
|
|
112
|
+
- "收集实践日志"
|
|
113
|
+
- "检测进化信号"
|
|
114
|
+
- "生成进化建议"
|
|
115
|
+
- "更新 practice-log.json"
|
|
116
|
+
expectedOutput:
|
|
117
|
+
- "practice-log.json"
|
|
118
|
+
- "evolution-signal.md"
|
|
119
|
+
- "improvement-suggestion.md"
|
|
120
|
+
successCriteria:
|
|
121
|
+
- "实践日志格式正确"
|
|
122
|
+
- "进化信号检测正确"
|
|
123
|
+
qualityMetrics:
|
|
124
|
+
- "实践日志完整率 >= 90%"
|
|
125
|
+
- "进化信号检出率 = 100%"
|
|
126
|
+
maxDuration: 300
|
|
127
|
+
|
|
128
|
+
successCriteria:
|
|
129
|
+
passRate: 85
|
|
130
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Design-Stage
|
|
2
|
+
# 适用于: 测试 design-stage skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-design-stage"
|
|
7
|
+
description: "Design-Stage Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: design-stage
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "simple-design"
|
|
14
|
+
input:
|
|
15
|
+
context: "{feature-name} 功能设计"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "执行架构设计(模块划分、接口定义)"
|
|
19
|
+
- "设计 API 接口(RESTful 规范)"
|
|
20
|
+
- "设计数据库模型(ER 图、表结构)"
|
|
21
|
+
- "定义安全机制(认证、授权、加密)"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "design.md"
|
|
24
|
+
- "api.yaml"
|
|
25
|
+
- "db-schema.sql"
|
|
26
|
+
- "security-design.md"
|
|
27
|
+
successCriteria:
|
|
28
|
+
- "架构设计合理"
|
|
29
|
+
- "API 设计符合 RESTful 规范"
|
|
30
|
+
- "数据库设计满足范式要求"
|
|
31
|
+
qualityMetrics:
|
|
32
|
+
- "设计完整率 >= 90%"
|
|
33
|
+
- "API 规范符合率 >= 95%"
|
|
34
|
+
maxDuration: 600
|
|
35
|
+
|
|
36
|
+
- name: "multi-system-design"
|
|
37
|
+
input:
|
|
38
|
+
context: "{feature} 设计,包含 {subsystems}"
|
|
39
|
+
complexity: 5
|
|
40
|
+
expectedBehaviors:
|
|
41
|
+
- "识别所有子系统"
|
|
42
|
+
- "设计整体架构"
|
|
43
|
+
- "定义系统间契约"
|
|
44
|
+
- "设计依赖顺序"
|
|
45
|
+
- "执行团队对抗审查"
|
|
46
|
+
expectedOutput:
|
|
47
|
+
- "design.md"
|
|
48
|
+
- "backend.md"
|
|
49
|
+
- "frontend.md"
|
|
50
|
+
- "contracts/api.yaml"
|
|
51
|
+
successCriteria:
|
|
52
|
+
- "识别所有子系统"
|
|
53
|
+
- "系统间契约定义清晰"
|
|
54
|
+
- "依赖顺序正确"
|
|
55
|
+
qualityMetrics:
|
|
56
|
+
- "子系统识别率 = 100%"
|
|
57
|
+
- "契约完整率 >= 90%"
|
|
58
|
+
maxDuration: 1200
|
|
59
|
+
|
|
60
|
+
- name: "ai-design"
|
|
61
|
+
input:
|
|
62
|
+
context: "AI 功能设计,使用 {model-type}"
|
|
63
|
+
complexity: 5
|
|
64
|
+
expectedBehaviors:
|
|
65
|
+
- "识别 AI/模型组件"
|
|
66
|
+
- "设计模型选型方案"
|
|
67
|
+
- "设计效果评测方案"
|
|
68
|
+
- "设计推理服务架构"
|
|
69
|
+
- "设计降级方案"
|
|
70
|
+
expectedOutput:
|
|
71
|
+
- "design.md"
|
|
72
|
+
- "ai-model.md"
|
|
73
|
+
- "evaluation.md"
|
|
74
|
+
- "fallback.md"
|
|
75
|
+
successCriteria:
|
|
76
|
+
- "AI 组件设计清晰"
|
|
77
|
+
- "效果指标可测量"
|
|
78
|
+
- "降级方案完整"
|
|
79
|
+
qualityMetrics:
|
|
80
|
+
- "AI 组件完整率 >= 90%"
|
|
81
|
+
- "评测方案完整率 >= 90%"
|
|
82
|
+
maxDuration: 900
|
|
83
|
+
|
|
84
|
+
- name: "data-modeling-design"
|
|
85
|
+
input:
|
|
86
|
+
context: "数据仓库设计,支持 BI 分析"
|
|
87
|
+
complexity: 5
|
|
88
|
+
expectedBehaviors:
|
|
89
|
+
- "识别数据源"
|
|
90
|
+
- "设计数据模型(维度建模)"
|
|
91
|
+
- "设计 ETL 流程"
|
|
92
|
+
- "设计数据质量检查机制"
|
|
93
|
+
expectedOutput:
|
|
94
|
+
- "design.md"
|
|
95
|
+
- "data-model.md"
|
|
96
|
+
- "etl-pipeline.md"
|
|
97
|
+
- "data-quality.md"
|
|
98
|
+
successCriteria:
|
|
99
|
+
- "数据源识别完整"
|
|
100
|
+
- "ETL 流程清晰可执行"
|
|
101
|
+
qualityMetrics:
|
|
102
|
+
- "数据源完整率 >= 90%"
|
|
103
|
+
- "ETL 完整率 >= 90%"
|
|
104
|
+
maxDuration: 900
|
|
105
|
+
|
|
106
|
+
- name: "high-concurrency-design"
|
|
107
|
+
input:
|
|
108
|
+
context: "秒杀系统设计,支持 10 万 QPS"
|
|
109
|
+
complexity: 5
|
|
110
|
+
expectedBehaviors:
|
|
111
|
+
- "设计缓存策略"
|
|
112
|
+
- "设计限流降级方案"
|
|
113
|
+
- "设计异步处理"
|
|
114
|
+
- "设计数据库优化"
|
|
115
|
+
- "设计容量规划"
|
|
116
|
+
expectedOutput:
|
|
117
|
+
- "design.md"
|
|
118
|
+
- "cache-design.md"
|
|
119
|
+
- "rate-limiting.md"
|
|
120
|
+
- "scaling.md"
|
|
121
|
+
successCriteria:
|
|
122
|
+
- "缓存策略合理"
|
|
123
|
+
- "限流降级方案完整"
|
|
124
|
+
qualityMetrics:
|
|
125
|
+
- "性能考虑完整率 >= 90%"
|
|
126
|
+
- "高可用设计合理率 >= 90%"
|
|
127
|
+
maxDuration: 900
|
|
128
|
+
|
|
129
|
+
successCriteria:
|
|
130
|
+
passRate: 85
|
|
131
|
+
avgFieldCompletion: 90
|