@chongyan/autospec 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.en.md +447 -321
- package/README.md +418 -286
- package/knowledge/01-principles/00-principles-hierarchy.md +247 -0
- package/knowledge/01-principles/01-first-principles.md +241 -0
- package/knowledge/01-principles/02-strategic-principles.md +286 -0
- package/knowledge/01-principles/03-tactical-principles.md +385 -0
- package/knowledge/01-principles/04-operational-principles.md +275 -0
- package/knowledge/01-principles/05-domain-principles.md +539 -0
- package/knowledge/01-principles/06-methodology-principles.md +281 -0
- package/knowledge/01-principles/07-cognitive-principles.md +277 -0
- package/knowledge/01-principles/08-auto-fix-principles.md +320 -0
- package/knowledge/01-principles/09-constitution.md +220 -0
- package/knowledge/{principles/evolution.md → 01-principles/10-evolution-mechanism.md} +160 -14
- package/knowledge/01-principles/README.en.md +385 -0
- package/knowledge/01-principles/README.md +385 -0
- package/knowledge/{process/overview.md → 02-process/00-overview.md} +90 -5
- package/knowledge/02-process/README.en.md +143 -0
- package/knowledge/02-process/README.md +186 -0
- package/knowledge/{guides/support/pipeline-protocol.md → 03-guides/00-pipeline-protocol.md} +10 -10
- package/knowledge/{guides/support/team-orchestrator.md → 03-guides/01-team-orchestrator.md} +53 -8
- package/knowledge/{guides/stages/requirement-analyzer.md → 03-guides/02-analyze-requirement.md} +3 -3
- package/knowledge/{guides/stages/ai-effect-evaluator.md → 03-guides/08-evaluate-ai-effect.md} +14 -7
- package/knowledge/{guides/support/skill-distiller.md → 03-guides/19-distill-skill.md} +3 -3
- package/knowledge/{guides/support/skill-updater.md → 03-guides/20-update-skill.md} +1 -1
- package/knowledge/{guides/support/methodology-extractor.md → 03-guides/22-extract-methodology.md} +2 -2
- package/knowledge/{guides/support/complexity-assessor.md → 03-guides/24-assess-complexity.md} +6 -4
- package/knowledge/{guides/support/tech-stack-analyzer.md → 03-guides/26-analyze-tech-stack.md} +1 -1
- package/knowledge/{guides/domain-driven-design.md → 03-guides/42-apply-ddd.md} +1 -1
- package/knowledge/{process/ai-sdlc.md → 03-guides/43-run-ai-sdlc.md} +1 -1
- package/knowledge/{guides/knowledge-management.md → 03-guides/44-manage-knowledge.md} +4 -4
- package/knowledge/03-guides/README.en.md +212 -0
- package/knowledge/03-guides/README.md +212 -0
- package/knowledge/{checklists/requirement.md → 04-checklists/00-requirement.md} +1 -1
- package/knowledge/{checklists/design.md → 04-checklists/01-design.md} +1 -1
- package/knowledge/{checklists/code.md → 04-checklists/02-code.md} +16 -1
- package/knowledge/{checklists/release.md → 04-checklists/04-release.md} +1 -1
- package/knowledge/04-checklists/README.en.md +119 -0
- package/knowledge/04-checklists/README.md +123 -0
- package/knowledge/{config/validation-patterns.yaml → 05-config/00-validation-patterns.yaml} +1 -1
- package/knowledge/{config/team-tasks.yaml → 05-config/02-team-tasks.yaml} +2 -2
- package/knowledge/05-config/03-role-composition.yaml +346 -0
- package/knowledge/{config/skill-compositions.yaml → 05-config/05-skill-compositions.yaml} +24 -24
- package/knowledge/05-config/README.en.md +54 -0
- package/knowledge/05-config/README.md +132 -0
- package/knowledge/06-environment/00-template-registry.md +310 -0
- package/knowledge/06-environment/01-detection-patterns.yaml +1692 -0
- package/knowledge/{environment → 06-environment}/README.en.md +4 -0
- package/knowledge/{environment → 06-environment}/README.md +66 -25
- package/knowledge/{standards/coding-style.md → 07-standards/00-coding-style.md} +123 -4
- package/knowledge/{standards/code-review.md → 07-standards/01-code-review.md} +3 -3
- package/knowledge/{standards/data-consistency.md → 07-standards/02-data-consistency.md} +1 -1
- package/knowledge/{standards/document-versioning.md → 07-standards/03-document-versioning.md} +1 -1
- package/knowledge/{standards/risk-detection.md → 07-standards/04-risk-detection.md} +5 -5
- package/knowledge/07-standards/README.en.md +119 -0
- package/knowledge/07-standards/README.md +123 -0
- package/knowledge/08-organization/00-vision-mission.md +113 -0
- package/knowledge/{organization/ai-native-team.md → 08-organization/01-ai-native-culture.md} +1 -1
- package/knowledge/{organization/team-metrics.md → 08-organization/02-team-metrics.md} +1 -1
- package/knowledge/08-organization/03-committee-structure.md +54 -0
- package/knowledge/08-organization/04-governance-metrics.md +55 -0
- package/knowledge/08-organization/05-improvement-process.md +71 -0
- package/knowledge/08-organization/README.en.md +165 -0
- package/knowledge/08-organization/README.md +165 -0
- package/knowledge/09-templates/00-requirement-proposal.md +344 -0
- package/knowledge/09-templates/01-architecture-design.md +494 -0
- package/knowledge/09-templates/02-api-design.md +408 -0
- package/knowledge/09-templates/03-database-design.md +313 -0
- package/knowledge/09-templates/04-product-design.md +237 -0
- package/knowledge/09-templates/05-domain-business.md +388 -0
- package/knowledge/09-templates/06-test-design.md +268 -0
- package/knowledge/09-templates/07-evaluation-design.md +372 -0
- package/knowledge/09-templates/08-component-knowledge.md +272 -0
- package/knowledge/09-templates/09-best-practices.md +218 -0
- package/knowledge/{environment/middleware-knowledge.md → 09-templates/10-middleware-knowledge.md} +106 -1
- package/knowledge/09-templates/README.en.md +222 -0
- package/knowledge/09-templates/README.md +216 -0
- package/knowledge/README.en.md +372 -0
- package/knowledge/README.md +354 -99
- package/package.json +1 -1
- package/plugins/.claude-plugin/plugin.json +460 -81
- package/plugins/agents/roles/ceo.md +1 -1
- package/plugins/agents/roles/product-owner.md +1 -1
- package/plugins/agents/roles/tech-lead.md +1 -1
- package/plugins/agents/support/consistency-checker.md +36 -3
- package/plugins/agents/support/monitoring-agent.md +215 -0
- package/plugins/agents/support/safety-auditor.md +2 -2
- package/plugins/agents/support/stage-gate-evaluator.md +95 -11
- package/plugins/agents/support/test-coverage-reviewer.md +1 -1
- package/plugins/benchmarks/templates/README.md +165 -13
- package/plugins/benchmarks/templates/commands/apply-template.yaml +108 -0
- package/plugins/benchmarks/templates/commands/archive-template.yaml +65 -0
- package/plugins/benchmarks/templates/commands/env-export-template.yaml +64 -0
- package/plugins/benchmarks/templates/commands/env-sync-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/env-template-template.yaml +96 -0
- package/plugins/benchmarks/templates/commands/env-template.yaml +58 -0
- package/plugins/benchmarks/templates/commands/env-update-template.yaml +110 -0
- package/plugins/benchmarks/templates/commands/env-validate-template.yaml +95 -0
- package/plugins/benchmarks/templates/commands/field-evolve-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/project-evolve-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/propose-template.yaml +88 -0
- package/plugins/benchmarks/templates/commands/review-template.yaml +124 -0
- package/plugins/benchmarks/templates/commands/run-template.yaml +127 -0
- package/plugins/benchmarks/templates/commands/test-template.yaml +149 -0
- package/plugins/benchmarks/templates/pipeline/experiment-template.yaml +92 -0
- package/plugins/benchmarks/templates/pipeline/hotfix-template.yaml +81 -0
- package/plugins/benchmarks/templates/skills/agile-iteration-template.yaml +78 -0
- package/plugins/benchmarks/templates/skills/benchmark-executor-template.yaml +114 -0
- package/plugins/benchmarks/templates/skills/benchmark-generator-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/delivery-stage-template.yaml +130 -0
- package/plugins/benchmarks/templates/skills/design-stage-template.yaml +131 -0
- package/plugins/benchmarks/templates/skills/experiment-iteration-template.yaml +60 -0
- package/plugins/benchmarks/templates/skills/exploration-phase-template.yaml +114 -0
- package/plugins/benchmarks/templates/skills/field-evolve-analyzer-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/field-evolve-distiller-template.yaml +34 -0
- package/plugins/benchmarks/templates/skills/field-evolve-executor-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/field-evolve-fixer-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/field-evolve-learner-template.yaml +33 -0
- package/plugins/benchmarks/templates/skills/field-evolve-scanner-template.yaml +74 -0
- package/plugins/benchmarks/templates/skills/field-evolve-template.yaml +71 -0
- package/plugins/benchmarks/templates/skills/field-evolve-verifier-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/hotfix-iteration-template.yaml +54 -0
- package/plugins/benchmarks/templates/skills/implementation-stage-template.yaml +127 -0
- package/plugins/benchmarks/templates/skills/layer1-validation-template.yaml +121 -0
- package/plugins/benchmarks/templates/skills/project-evolve-analyzer-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/project-evolve-fixer-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/project-evolve-generator-template.yaml +34 -0
- package/plugins/benchmarks/templates/skills/project-evolve-learner-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/project-evolve-reviewer-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/project-evolve-scanner-template.yaml +75 -0
- package/plugins/benchmarks/templates/skills/project-evolve-template.yaml +72 -0
- package/plugins/benchmarks/templates/skills/project-evolve-verifier-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/skill-forge-template.yaml +117 -0
- package/plugins/benchmarks/templates/skills/startup-guard-template.yaml +103 -0
- package/plugins/benchmarks/templates/skills/testing-stage-template.yaml +146 -0
- package/plugins/benchmarks/templates/skills/waterfall-iteration-template.yaml +55 -0
- package/plugins/commands/README.en.md +2 -2
- package/plugins/commands/README.md +2 -2
- package/plugins/commands/apply.md +102 -16
- package/plugins/commands/archive.md +60 -4
- package/plugins/commands/env-sync.md +1047 -406
- package/plugins/commands/env-template.md +11 -135
- package/plugins/commands/env-update.md +1 -1
- package/plugins/commands/env-validate.md +3 -3
- package/plugins/commands/explore.md +118 -1
- package/plugins/commands/field-evolve.md +51 -175
- package/plugins/commands/project-evolve.md +167 -68
- package/plugins/commands/propose.md +97 -6
- package/plugins/commands/review.md +5 -5
- package/plugins/commands/run.md +841 -13
- package/plugins/commands/status.md +138 -17
- package/plugins/commands/test.md +389 -0
- package/plugins/hooks/constitution-guard.js +1 -1
- package/plugins/hooks/environment-autocommit.js +366 -24
- package/plugins/hooks/environment-manager.js +3 -2
- package/plugins/hooks/execution-tracker.js +109 -4
- package/plugins/hooks/layer1-validator.js +117 -1
- package/plugins/hooks/lib/auto-fix-loop.js +605 -0
- package/plugins/hooks/lib/environment-config-loader.js +11 -7
- package/plugins/hooks/lib/hook-state-manager.js +98 -0
- package/plugins/hooks/lib/memory-extractor.js +27 -5
- package/plugins/hooks/lib/memory-manager.js +1 -1
- package/plugins/hooks/lib/test-auto-fix.test.js +194 -0
- package/plugins/hooks/monitoring-trigger.js +467 -0
- package/plugins/skills/README.en.md +15 -3
- package/plugins/skills/README.md +21 -11
- package/plugins/skills/agile-iteration/SKILL.md +187 -0
- package/plugins/skills/delivery-stage/SKILL.md +133 -12
- package/plugins/skills/design-stage/SKILL.md +103 -12
- package/plugins/skills/experiment-evaluator/SKILL.md +271 -0
- package/plugins/skills/experiment-iteration/SKILL.md +154 -0
- package/plugins/skills/exploration-phase/SKILL.md +93 -10
- package/plugins/skills/field-evolve-analyzer/SKILL.md +65 -0
- package/plugins/skills/field-evolve-distiller/SKILL.md +66 -0
- package/plugins/skills/field-evolve-executor/SKILL.md +94 -0
- package/plugins/skills/field-evolve-executor/executor.js +342 -0
- package/plugins/skills/field-evolve-fixer/SKILL.md +69 -0
- package/plugins/skills/field-evolve-learner/SKILL.md +65 -0
- package/plugins/skills/field-evolve-scanner/SKILL.md +87 -0
- package/plugins/skills/field-evolve-scanner/scripts/fallback-scanner.js +288 -0
- package/plugins/skills/field-evolve-verifier/SKILL.md +64 -0
- package/plugins/skills/hotfix-iteration/SKILL.md +279 -0
- package/plugins/skills/implementation-stage/SKILL.md +156 -15
- package/plugins/skills/layer1-validation/SKILL.md +1 -1
- package/plugins/skills/pending-dashboard/SKILL.md +9 -8
- package/plugins/skills/project-evolve-analyzer/SKILL.md +95 -0
- package/plugins/skills/project-evolve-fixer/SKILL.md +99 -0
- package/plugins/skills/project-evolve-generator/SKILL.md +149 -0
- package/plugins/skills/project-evolve-learner/SKILL.md +103 -0
- package/plugins/skills/project-evolve-reviewer/SKILL.md +104 -0
- package/plugins/skills/project-evolve-scanner/SKILL.md +95 -0
- package/plugins/skills/project-evolve-scanner/scripts/dependency-reuse-checker.js +395 -0
- package/plugins/skills/project-evolve-scanner/scripts/subsystem-coverage.js +315 -0
- package/plugins/skills/project-evolve-verifier/SKILL.md +105 -0
- package/plugins/skills/requirement-stage/SKILL.md +47 -13
- package/plugins/skills/skill-forge/SKILL.md +2 -2
- package/plugins/skills/testing-stage/SKILL.md +583 -8
- package/plugins/skills/waterfall-iteration/SKILL.md +115 -0
- package/scripts/cli/index.js +1 -1
- package/scripts/cli/init.js +30 -4
- package/scripts/cli/list.js +3 -2
- package/scripts/config/commands.config.js +8 -8
- package/scripts/config/hooks.config.js +1 -1
- package/scripts/install/constants.js +204 -165
- package/scripts/state.js +210 -1
- package/knowledge/config/README.en.md +0 -44
- package/knowledge/config/README.md +0 -44
- package/knowledge/config/role-composition.yaml +0 -98
- package/knowledge/config/team-triggers.yaml +0 -198
- package/knowledge/domain/README.md +0 -115
- package/knowledge/domain/flows/README.md +0 -194
- package/knowledge/domain/glossary.md +0 -143
- package/knowledge/domain/rules.md +0 -138
- package/knowledge/environment/component-knowledge.md +0 -316
- package/knowledge/environment/detection-patterns.yaml +0 -502
- package/knowledge/environment/template-registry.md +0 -321
- package/knowledge/guides/requirement-engineering.md +0 -329
- package/knowledge/guides/system-design.md +0 -352
- package/knowledge/principles/constitution.md +0 -134
- package/knowledge/principles/core-principles.md +0 -368
- package/knowledge/principles/design-philosophy.md +0 -877
- package/knowledge/process/README.en.md +0 -38
- package/knowledge/process/README.md +0 -48
- package/knowledge/templates/ai-evaluation.md +0 -150
- package/knowledge/templates/api-design.md +0 -117
- package/knowledge/templates/database-design.md +0 -132
- package/knowledge/templates/domain-driven-design.md +0 -321
- package/knowledge/templates/product-proposal.md +0 -201
- package/knowledge/templates/system-design.md +0 -227
- package/knowledge/templates/task-breakdown.md +0 -107
- package/knowledge/templates/test-case.md +0 -170
- package/plugins/commands/validate.md +0 -108
- package/plugins/skills/benchmark-executor/README.md +0 -93
- package/plugins/skills/evolution-process/SKILL.md +0 -291
- package/plugins/skills/project-evolution/SKILL.md +0 -847
- package/scripts/evolution/evolution-router.js +0 -273
- package/scripts/evolution/evolution-signal-collector.js +0 -307
- package/scripts/evolution/knowledge-loader.js +0 -346
- package/scripts/evolution/marketplace.js +0 -317
- package/scripts/evolution/version-manager.js +0 -371
- /package/knowledge/{process → 02-process}/01-requirement.md +0 -0
- /package/knowledge/{process → 02-process}/02-design.md +0 -0
- /package/knowledge/{process → 02-process}/03-implementation.md +0 -0
- /package/knowledge/{process → 02-process}/04-review.md +0 -0
- /package/knowledge/{process → 02-process}/05-testing.md +0 -0
- /package/knowledge/{process → 02-process}/06-delivery.md +0 -0
- /package/knowledge/{guides/stages/design-planner.md → 03-guides/03-design-solution.md} +0 -0
- /package/knowledge/{guides/stages/code-implementer.md → 03-guides/04-implement-code.md} +0 -0
- /package/knowledge/{guides/stages/test-planner.md → 03-guides/05-plan-testing.md} +0 -0
- /package/knowledge/{guides/stages/test-generator.md → 03-guides/06-generate-tests.md} +0 -0
- /package/knowledge/{guides/stages/release-checker.md → 03-guides/07-check-release.md} +0 -0
- /package/knowledge/{guides/stages/requirement-reviewer.md → 03-guides/09-review-requirement.md} +0 -0
- /package/knowledge/{guides/stages/design-reviewer.md → 03-guides/10-review-design.md} +0 -0
- /package/knowledge/{guides/stages/code-reviewer.md → 03-guides/11-review-code.md} +0 -0
- /package/knowledge/{guides/stages/test-reviewer.md → 03-guides/12-review-testing.md} +0 -0
- /package/knowledge/{guides/stages/security-reviewer.md → 03-guides/13-audit-security.md} +0 -0
- /package/knowledge/{guides/stages/consistency-checker.md → 03-guides/14-check-consistency.md} +0 -0
- /package/knowledge/{guides/stages/unit-test-runner.md → 03-guides/15-run-unit-tests.md} +0 -0
- /package/knowledge/{guides/stages/integration-test-runner.md → 03-guides/16-run-integration-tests.md} +0 -0
- /package/knowledge/{guides/stages/test-context-analyzer.md → 03-guides/17-analyze-test-context.md} +0 -0
- /package/knowledge/{guides/support/practice-logger.md → 03-guides/18-log-practice.md} +0 -0
- /package/knowledge/{guides/support/skill-validator.md → 03-guides/21-validate-skill.md} +0 -0
- /package/knowledge/{guides/support/scope-inference.md → 03-guides/23-infer-scope.md} +0 -0
- /package/knowledge/{guides/support/component-discovery.md → 03-guides/25-discover-component.md} +0 -0
- /package/knowledge/{guides/support/environment-scanner.md → 03-guides/27-scan-environment.md} +0 -0
- /package/knowledge/{guides/support/environment-validator.md → 03-guides/28-validate-environment.md} +0 -0
- /package/knowledge/{guides/support/knowledge-generator.md → 03-guides/29-generate-knowledge.md} +0 -0
- /package/knowledge/{guides/support/ai-capability-analyzer.md → 03-guides/30-analyze-ai-capability.md} +0 -0
- /package/knowledge/{guides/support/ai-component-analyzer.md → 03-guides/31-analyze-ai-component.md} +0 -0
- /package/knowledge/{guides/support/ai-agent-analyzer.md → 03-guides/32-analyze-ai-agent.md} +0 -0
- /package/knowledge/{guides/support/ai-rag-analyzer.md → 03-guides/33-analyze-ai-rag.md} +0 -0
- /package/knowledge/{guides/support/ai-task-assessor.md → 03-guides/34-assess-ai-task.md} +0 -0
- /package/knowledge/{guides/support/ai-pipeline-evaluator.md → 03-guides/35-evaluate-ai-pipeline.md} +0 -0
- /package/knowledge/{guides/support/ai-artifact-evaluator.md → 03-guides/36-evaluate-ai-artifact.md} +0 -0
- /package/knowledge/{guides/support/ai-evaluation-planner.md → 03-guides/37-plan-ai-evaluation.md} +0 -0
- /package/knowledge/{guides/support/ai-path-evaluator.md → 03-guides/38-evaluate-ai-path.md} +0 -0
- /package/knowledge/{guides/support/ai-data-validator.md → 03-guides/39-validate-ai-data.md} +0 -0
- /package/knowledge/{guides/support/ai-anomaly-analyzer.md → 03-guides/40-detect-ai-anomaly.md} +0 -0
- /package/knowledge/{guides/support/ai-test-diagnostics.md → 03-guides/41-diagnose-ai-test.md} +0 -0
- /package/knowledge/{guides/support/test-runner.md → 03-guides/45-test-runner.md} +0 -0
- /package/knowledge/{checklists/test.md → 04-checklists/03-test.md} +0 -0
- /package/knowledge/{config/team-stage.yaml → 05-config/01-team-stage.yaml} +0 -0
- /package/knowledge/{config/role-extensions.yaml → 05-config/04-role-extensions.yaml} +0 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Experiment-Iteration
|
|
2
|
+
# 适用于: 测试 experiment-iteration skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-experiment-iteration"
|
|
7
|
+
description: "Experiment-Iteration Skill 基准测试 - 实验模式验证流程"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: experiment-iteration
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "simple-experiment"
|
|
14
|
+
input:
|
|
15
|
+
context: "验证 {technology} 用于 {use-case} 的可行性"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "定义假设"
|
|
19
|
+
- "设计实验"
|
|
20
|
+
- "实现原型"
|
|
21
|
+
- "执行评测"
|
|
22
|
+
- "验证假设"
|
|
23
|
+
expectedOutput:
|
|
24
|
+
- "hypothesis.md"
|
|
25
|
+
- "prototype 代码"
|
|
26
|
+
- "evaluation-report.md"
|
|
27
|
+
- "conclusion.md"
|
|
28
|
+
successCriteria:
|
|
29
|
+
- "假设定义清晰"
|
|
30
|
+
- "评测执行完整"
|
|
31
|
+
- "结论明确"
|
|
32
|
+
qualityMetrics:
|
|
33
|
+
- "假设验证完整率 = 100%"
|
|
34
|
+
- "评测执行率 = 100%"
|
|
35
|
+
maxDuration: 1800
|
|
36
|
+
|
|
37
|
+
- name: "ai-experiment"
|
|
38
|
+
input:
|
|
39
|
+
context: "验证 AI 模型效果"
|
|
40
|
+
complexity: 5
|
|
41
|
+
expectedBehaviors:
|
|
42
|
+
- "定义效果指标"
|
|
43
|
+
- "构建评测数据集"
|
|
44
|
+
- "训练/微调模型"
|
|
45
|
+
- "执行效果评测"
|
|
46
|
+
expectedOutput:
|
|
47
|
+
- "evaluation-plan.md"
|
|
48
|
+
- "dataset.jsonl"
|
|
49
|
+
- "evaluation-report.md"
|
|
50
|
+
successCriteria:
|
|
51
|
+
- "效果指标可测量"
|
|
52
|
+
- "评测数据集有代表性"
|
|
53
|
+
qualityMetrics:
|
|
54
|
+
- "指标完整率 >= 90%"
|
|
55
|
+
- "数据集覆盖率 >= 80%"
|
|
56
|
+
maxDuration: 3600
|
|
57
|
+
|
|
58
|
+
successCriteria:
|
|
59
|
+
passRate: 80
|
|
60
|
+
avgFieldCompletion: 85
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Exploration-Phase
|
|
2
|
+
# 适用于: 测试 exploration-phase skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-exploration-phase"
|
|
7
|
+
description: "Exploration-Phase Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: exploration-phase
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "simple-exploration"
|
|
14
|
+
input:
|
|
15
|
+
context: "实现 {feature-name} 功能"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "执行复杂度评估(simple)"
|
|
19
|
+
- "启动 2 个 Agent(CEO + 产品负责人)"
|
|
20
|
+
- "执行 2-3 轮澄清"
|
|
21
|
+
- "每轮最多 5 个问题"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "clarifications.md"
|
|
24
|
+
- "requirement.md"
|
|
25
|
+
- "复杂度评估结果"
|
|
26
|
+
successCriteria:
|
|
27
|
+
- "澄清轮次 2-3 轮"
|
|
28
|
+
- "问题数 <= 5 个/轮"
|
|
29
|
+
- "需求文档结构完整"
|
|
30
|
+
qualityMetrics:
|
|
31
|
+
- "字段完整率 >= 90%"
|
|
32
|
+
- "澄清问题质量 >= 80%"
|
|
33
|
+
maxDuration: 600
|
|
34
|
+
|
|
35
|
+
- name: "medium-exploration"
|
|
36
|
+
input:
|
|
37
|
+
context: "实现 {feature} 功能,包含多个模块"
|
|
38
|
+
complexity: 3
|
|
39
|
+
expectedBehaviors:
|
|
40
|
+
- "执行复杂度评估(medium)"
|
|
41
|
+
- "启动 3 个 Agent(CEO + 产品 + 技术)"
|
|
42
|
+
- "执行 3-4 轮澄清"
|
|
43
|
+
- "识别技术风险"
|
|
44
|
+
expectedOutput:
|
|
45
|
+
- "clarifications.md"
|
|
46
|
+
- "requirement.md"
|
|
47
|
+
- "技术风险评估"
|
|
48
|
+
successCriteria:
|
|
49
|
+
- "澄清轮次 3-4 轮"
|
|
50
|
+
- "技术风险识别完整"
|
|
51
|
+
qualityMetrics:
|
|
52
|
+
- "风险识别率 >= 90%"
|
|
53
|
+
maxDuration: 900
|
|
54
|
+
|
|
55
|
+
- name: "complex-exploration"
|
|
56
|
+
input:
|
|
57
|
+
context: "实现 {feature},包含 AI 模型、后端 API、前端界面"
|
|
58
|
+
complexity: 5
|
|
59
|
+
expectedBehaviors:
|
|
60
|
+
- "执行复杂度评估(complex)"
|
|
61
|
+
- "启动 4+ 个 Agent"
|
|
62
|
+
- "执行 5-6 轮澄清"
|
|
63
|
+
- "识别多系统边界"
|
|
64
|
+
- "定义系统间契约"
|
|
65
|
+
expectedOutput:
|
|
66
|
+
- "clarifications.md"
|
|
67
|
+
- "requirement.md"
|
|
68
|
+
- "多系统需求分析"
|
|
69
|
+
- "系统间契约定义"
|
|
70
|
+
successCriteria:
|
|
71
|
+
- "多系统识别完整"
|
|
72
|
+
- "系统间契约清晰"
|
|
73
|
+
qualityMetrics:
|
|
74
|
+
- "系统识别率 = 100%"
|
|
75
|
+
- "契约完整率 >= 90%"
|
|
76
|
+
maxDuration: 1200
|
|
77
|
+
|
|
78
|
+
- name: "ambiguous-requirement"
|
|
79
|
+
input:
|
|
80
|
+
context: "做一个好用的后台管理系统"
|
|
81
|
+
complexity: 3
|
|
82
|
+
expectedBehaviors:
|
|
83
|
+
- "识别需求歧义"
|
|
84
|
+
- "执行结构化澄清"
|
|
85
|
+
- "每个问题附 AI 推荐答案"
|
|
86
|
+
- "支持用户提前终止"
|
|
87
|
+
successCriteria:
|
|
88
|
+
- "歧义识别完整"
|
|
89
|
+
- "澄清问题 <= 5 个"
|
|
90
|
+
- "每个问题有推荐答案"
|
|
91
|
+
qualityMetrics:
|
|
92
|
+
- "歧义识别率 >= 90%"
|
|
93
|
+
- "推荐答案合理率 >= 90%"
|
|
94
|
+
maxDuration: 600
|
|
95
|
+
|
|
96
|
+
- name: "early-termination"
|
|
97
|
+
input:
|
|
98
|
+
context: "实现 {feature} 功能"
|
|
99
|
+
earlyTermination: true
|
|
100
|
+
complexity: 1
|
|
101
|
+
expectedBehaviors:
|
|
102
|
+
- "执行第 1 轮澄清"
|
|
103
|
+
- "正确识别用户终止意图"
|
|
104
|
+
- "提前终止澄清流程"
|
|
105
|
+
successCriteria:
|
|
106
|
+
- "终止识别准确"
|
|
107
|
+
- "需求文档完整"
|
|
108
|
+
qualityMetrics:
|
|
109
|
+
- "终止识别准确率 = 100%"
|
|
110
|
+
maxDuration: 300
|
|
111
|
+
|
|
112
|
+
successCriteria:
|
|
113
|
+
passRate: 85
|
|
114
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Field-Evolve-Analyzer
|
|
2
|
+
# 适用于: 测试 field-evolve-analyzer skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-field-evolve-analyzer"
|
|
7
|
+
description: "Field-Evolve-Analyzer Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: field-evolve-analyzer
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "problem-analysis"
|
|
14
|
+
input:
|
|
15
|
+
context: "分析测试失败的问题"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取测试结果"
|
|
19
|
+
- "识别失败模式"
|
|
20
|
+
- "分析根因"
|
|
21
|
+
- "评估影响范围"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "问题分析报告"
|
|
24
|
+
- "根因分析"
|
|
25
|
+
- "影响评估"
|
|
26
|
+
successCriteria:
|
|
27
|
+
- "失败模式识别准确"
|
|
28
|
+
- "根因分析深入"
|
|
29
|
+
qualityMetrics:
|
|
30
|
+
- "根因识别率 >= 85%"
|
|
31
|
+
maxDuration: 300
|
|
32
|
+
|
|
33
|
+
- name: "priority-calculation"
|
|
34
|
+
input:
|
|
35
|
+
context: "计算问题修复优先级"
|
|
36
|
+
complexity: 1
|
|
37
|
+
expectedBehaviors:
|
|
38
|
+
- "评估影响程度"
|
|
39
|
+
- "评估修复难度"
|
|
40
|
+
- "计算优先级分数"
|
|
41
|
+
expectedOutput:
|
|
42
|
+
- "优先级排序列表"
|
|
43
|
+
successCriteria:
|
|
44
|
+
- "优先级计算合理"
|
|
45
|
+
qualityMetrics:
|
|
46
|
+
- "优先级合理性 >= 90%"
|
|
47
|
+
maxDuration: 120
|
|
48
|
+
|
|
49
|
+
successCriteria:
|
|
50
|
+
passRate: 85
|
|
51
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Field-Evolve-Distiller
|
|
2
|
+
# 适用于: 测试 field-evolve-distiller skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-field-evolve-distiller"
|
|
7
|
+
description: "Field-Evolve-Distiller Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: field-evolve-distiller
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "skill-distillation"
|
|
14
|
+
input:
|
|
15
|
+
context: "从实践日志提炼技能"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取进化规则"
|
|
19
|
+
- "收集 practice-log"
|
|
20
|
+
- "执行技能蒸馏"
|
|
21
|
+
- "验证进化效果"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "distill-report.md"
|
|
24
|
+
- "distilled-skills/"
|
|
25
|
+
successCriteria:
|
|
26
|
+
- "practice-log 完整读取"
|
|
27
|
+
- "技能提炼符合规范"
|
|
28
|
+
qualityMetrics:
|
|
29
|
+
- "技能规范率 >= 90%"
|
|
30
|
+
maxDuration: 600
|
|
31
|
+
|
|
32
|
+
successCriteria:
|
|
33
|
+
passRate: 85
|
|
34
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Field-Evolve-Executor
|
|
2
|
+
# 适用于: 测试 field-evolve-executor skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-field-evolve-executor"
|
|
7
|
+
description: "Field-Evolve-Executor Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: field-evolve-executor
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "benchmark-execution"
|
|
14
|
+
input:
|
|
15
|
+
context: "执行 benchmark 测试"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取 benchmark YAML"
|
|
19
|
+
- "创建临时目录"
|
|
20
|
+
- "执行完整流程"
|
|
21
|
+
- "捕获详细指标"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "执行结果"
|
|
24
|
+
- "指标数据"
|
|
25
|
+
- "产出物"
|
|
26
|
+
successCriteria:
|
|
27
|
+
- "benchmark 执行完整"
|
|
28
|
+
- "指标捕获准确"
|
|
29
|
+
qualityMetrics:
|
|
30
|
+
- "benchmark 执行率 = 100%"
|
|
31
|
+
maxDuration: 900
|
|
32
|
+
|
|
33
|
+
- name: "isolated-execution"
|
|
34
|
+
input:
|
|
35
|
+
context: "在隔离环境中执行测试"
|
|
36
|
+
complexity: 5
|
|
37
|
+
expectedBehaviors:
|
|
38
|
+
- "创建临时 git worktree"
|
|
39
|
+
- "执行测试"
|
|
40
|
+
- "清理环境"
|
|
41
|
+
successCriteria:
|
|
42
|
+
- "隔离环境创建成功"
|
|
43
|
+
- "清理完整"
|
|
44
|
+
qualityMetrics:
|
|
45
|
+
- "隔离成功率 = 100%"
|
|
46
|
+
maxDuration: 600
|
|
47
|
+
|
|
48
|
+
successCriteria:
|
|
49
|
+
passRate: 85
|
|
50
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Field-Evolve-Fixer
|
|
2
|
+
# 适用于: 测试 field-evolve-fixer skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-field-evolve-fixer"
|
|
7
|
+
description: "Field-Evolve-Fixer Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: field-evolve-fixer
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "auto-fix"
|
|
14
|
+
input:
|
|
15
|
+
context: "自动修复检测到的问题"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取问题清单"
|
|
19
|
+
- "分类可自动修复问题"
|
|
20
|
+
- "执行修复"
|
|
21
|
+
- "验证修复效果"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "修复记录"
|
|
24
|
+
- "验证报告"
|
|
25
|
+
successCriteria:
|
|
26
|
+
- "修复有效"
|
|
27
|
+
- "无退化发生"
|
|
28
|
+
qualityMetrics:
|
|
29
|
+
- "修复有效率 >= 85%"
|
|
30
|
+
- "退化检出率 = 100%"
|
|
31
|
+
maxDuration: 600
|
|
32
|
+
|
|
33
|
+
- name: "manual-review-required"
|
|
34
|
+
input:
|
|
35
|
+
context: "处理需要人工审查的问题"
|
|
36
|
+
complexity: 1
|
|
37
|
+
expectedBehaviors:
|
|
38
|
+
- "识别需人工审查问题"
|
|
39
|
+
- "生成审查建议"
|
|
40
|
+
- "不自动执行"
|
|
41
|
+
expectedOutput:
|
|
42
|
+
- "审查建议清单"
|
|
43
|
+
successCriteria:
|
|
44
|
+
- "分类正确"
|
|
45
|
+
- "不自动执行"
|
|
46
|
+
qualityMetrics:
|
|
47
|
+
- "分类准确率 = 100%"
|
|
48
|
+
maxDuration: 120
|
|
49
|
+
|
|
50
|
+
successCriteria:
|
|
51
|
+
passRate: 85
|
|
52
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Field-Evolve-Learner
|
|
2
|
+
# 适用于: 测试 field-evolve-learner skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-field-evolve-learner"
|
|
7
|
+
description: "Field-Evolve-Learner Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: field-evolve-learner
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "pattern-learning"
|
|
14
|
+
input:
|
|
15
|
+
context: "从实践日志中学习模式"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取 practice-log"
|
|
19
|
+
- "识别重复模式"
|
|
20
|
+
- "提取通用解决方案"
|
|
21
|
+
expectedOutput:
|
|
22
|
+
- "模式识别报告"
|
|
23
|
+
- "通用解决方案"
|
|
24
|
+
successCriteria:
|
|
25
|
+
- "模式识别准确"
|
|
26
|
+
- "解决方案合理"
|
|
27
|
+
qualityMetrics:
|
|
28
|
+
- "模式识别率 >= 80%"
|
|
29
|
+
maxDuration: 300
|
|
30
|
+
|
|
31
|
+
successCriteria:
|
|
32
|
+
passRate: 80
|
|
33
|
+
avgFieldCompletion: 85
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Field-Evolve-Scanner
|
|
2
|
+
# 适用于: 测试 field-evolve-scanner skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-field-evolve-scanner"
|
|
7
|
+
description: "Field-Evolve-Scanner Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: field-evolve-scanner
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "simple-project-scan"
|
|
14
|
+
input:
|
|
15
|
+
context: "扫描一个简单的 Node.js 项目"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "检测技术栈"
|
|
19
|
+
- "检测项目结构"
|
|
20
|
+
- "识别代码复杂度问题"
|
|
21
|
+
- "输出问题清单"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "项目特征报告"
|
|
24
|
+
- "问题清单"
|
|
25
|
+
successCriteria:
|
|
26
|
+
- "技术栈检测正确"
|
|
27
|
+
- "问题识别完整"
|
|
28
|
+
qualityMetrics:
|
|
29
|
+
- "技术栈识别准确率 >= 95%"
|
|
30
|
+
- "问题识别率 >= 80%"
|
|
31
|
+
maxDuration: 300
|
|
32
|
+
|
|
33
|
+
- name: "multi-system-scan"
|
|
34
|
+
input:
|
|
35
|
+
context: "扫描包含前后端的多系统项目"
|
|
36
|
+
complexity: 5
|
|
37
|
+
expectedBehaviors:
|
|
38
|
+
- "检测多个子系统"
|
|
39
|
+
- "识别系统边界"
|
|
40
|
+
- "分析依赖关系"
|
|
41
|
+
- "启用兜底扫描分类文档"
|
|
42
|
+
expectedOutput:
|
|
43
|
+
- "多系统特征报告"
|
|
44
|
+
- "文档分类报告"
|
|
45
|
+
successCriteria:
|
|
46
|
+
- "子系统识别完整"
|
|
47
|
+
- "文档分类准确"
|
|
48
|
+
qualityMetrics:
|
|
49
|
+
- "子系统识别率 = 100%"
|
|
50
|
+
- "文档分类准确率 >= 90%"
|
|
51
|
+
maxDuration: 600
|
|
52
|
+
|
|
53
|
+
- name: "fallback-classification"
|
|
54
|
+
input:
|
|
55
|
+
context: "扫描项目并分类未匹配规则的文档"
|
|
56
|
+
complexity: 3
|
|
57
|
+
expectedBehaviors:
|
|
58
|
+
- "收集所有 Markdown 文件"
|
|
59
|
+
- "排除已规则匹配的文件"
|
|
60
|
+
- "使用 AI 模型分类"
|
|
61
|
+
expectedOutput:
|
|
62
|
+
- "设计文档列表"
|
|
63
|
+
- "业务文档列表"
|
|
64
|
+
- "测试文档列表"
|
|
65
|
+
successCriteria:
|
|
66
|
+
- "分类逻辑正确"
|
|
67
|
+
- "分类结果合理"
|
|
68
|
+
qualityMetrics:
|
|
69
|
+
- "分类准确率 >= 85%"
|
|
70
|
+
maxDuration: 450
|
|
71
|
+
|
|
72
|
+
successCriteria:
|
|
73
|
+
passRate: 85
|
|
74
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Field-Evolve
|
|
2
|
+
# 适用于: 测试 field-evolve skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-field-evolve"
|
|
7
|
+
description: "Field-Evolve Skill 基准测试 - 实战项目自进化"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: field-evolve
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "quick-validation"
|
|
14
|
+
input:
|
|
15
|
+
context: "快速验证模式"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取 config.json"
|
|
19
|
+
- "执行编译检查"
|
|
20
|
+
- "执行单元测试"
|
|
21
|
+
- "执行 Lint 检查"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "test-result.json"
|
|
24
|
+
- "quick-report.md"
|
|
25
|
+
successCriteria:
|
|
26
|
+
- "所有检查项执行"
|
|
27
|
+
- "结果记录完整"
|
|
28
|
+
qualityMetrics:
|
|
29
|
+
- "检查执行率 = 100%"
|
|
30
|
+
maxDuration: 300
|
|
31
|
+
|
|
32
|
+
- name: "deep-testing"
|
|
33
|
+
input:
|
|
34
|
+
context: "深度测试模式"
|
|
35
|
+
complexity: 5
|
|
36
|
+
expectedBehaviors:
|
|
37
|
+
- "扫描 benchmarks"
|
|
38
|
+
- "执行 benchmark 测试场景"
|
|
39
|
+
- "计算三维度评分"
|
|
40
|
+
expectedOutput:
|
|
41
|
+
- "deep-report.md"
|
|
42
|
+
- "evaluation.json"
|
|
43
|
+
successCriteria:
|
|
44
|
+
- "benchmark 执行完整"
|
|
45
|
+
- "评分计算正确"
|
|
46
|
+
qualityMetrics:
|
|
47
|
+
- "benchmark 执行率 = 100%"
|
|
48
|
+
maxDuration: 1800
|
|
49
|
+
|
|
50
|
+
- name: "full-cycle"
|
|
51
|
+
input:
|
|
52
|
+
context: "完整循环模式"
|
|
53
|
+
complexity: 5
|
|
54
|
+
expectedBehaviors:
|
|
55
|
+
- "执行深度测试"
|
|
56
|
+
- "生成改进方案"
|
|
57
|
+
- "执行自动修复"
|
|
58
|
+
- "技能蒸馏"
|
|
59
|
+
expectedOutput:
|
|
60
|
+
- "full-report.md"
|
|
61
|
+
- "distilled-skills/"
|
|
62
|
+
successCriteria:
|
|
63
|
+
- "完整循环执行成功"
|
|
64
|
+
- "无退化发生"
|
|
65
|
+
qualityMetrics:
|
|
66
|
+
- "修复有效率 >= 85%"
|
|
67
|
+
maxDuration: 2400
|
|
68
|
+
|
|
69
|
+
successCriteria:
|
|
70
|
+
passRate: 85
|
|
71
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Field-Evolve-Verifier
|
|
2
|
+
# 适用于: 测试 field-evolve-verifier skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-field-evolve-verifier"
|
|
7
|
+
description: "Field-Evolve-Verifier Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: field-evolve-verifier
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "fix-verification"
|
|
14
|
+
input:
|
|
15
|
+
context: "验证修复效果"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "重新执行失败测试"
|
|
19
|
+
- "对比修复前后"
|
|
20
|
+
- "检测退化"
|
|
21
|
+
- "计算质量分数"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "验证报告"
|
|
24
|
+
- "质量分数对比"
|
|
25
|
+
successCriteria:
|
|
26
|
+
- "验证执行完整"
|
|
27
|
+
- "退化检测正确"
|
|
28
|
+
qualityMetrics:
|
|
29
|
+
- "验证完整率 = 100%"
|
|
30
|
+
- "退化检出率 = 100%"
|
|
31
|
+
maxDuration: 450
|
|
32
|
+
|
|
33
|
+
- name: "benchmark-comparison"
|
|
34
|
+
input:
|
|
35
|
+
context: "对比 benchmark 结果"
|
|
36
|
+
complexity: 3
|
|
37
|
+
expectedBehaviors:
|
|
38
|
+
- "执行 benchmark"
|
|
39
|
+
- "对比历史结果"
|
|
40
|
+
- "识别退化"
|
|
41
|
+
expectedOutput:
|
|
42
|
+
- "对比报告"
|
|
43
|
+
successCriteria:
|
|
44
|
+
- "对比准确"
|
|
45
|
+
qualityMetrics:
|
|
46
|
+
- "对比准确率 = 100%"
|
|
47
|
+
maxDuration: 600
|
|
48
|
+
|
|
49
|
+
successCriteria:
|
|
50
|
+
passRate: 90
|
|
51
|
+
avgFieldCompletion: 95
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Hotfix-Iteration
|
|
2
|
+
# 适用于: 测试 hotfix-iteration skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-hotfix-iteration"
|
|
7
|
+
description: "Hotfix-Iteration Skill 基准测试 - 热修复快速流程"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: hotfix-iteration
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "simple-hotfix"
|
|
14
|
+
input:
|
|
15
|
+
context: "修复生产环境 {bug-description}"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "问题诊断"
|
|
19
|
+
- "风险评估"
|
|
20
|
+
- "快速修复"
|
|
21
|
+
- "快速审查"
|
|
22
|
+
- "快速部署"
|
|
23
|
+
expectedOutput:
|
|
24
|
+
- "hotfix-issue.md"
|
|
25
|
+
- "变更代码"
|
|
26
|
+
- "回归测试"
|
|
27
|
+
successCriteria:
|
|
28
|
+
- "问题定位准确"
|
|
29
|
+
- "变更最小化"
|
|
30
|
+
- "回归测试通过"
|
|
31
|
+
qualityMetrics:
|
|
32
|
+
- "修复耗时 < 15 分钟"
|
|
33
|
+
- "变更行数 < 50"
|
|
34
|
+
maxDuration: 900
|
|
35
|
+
|
|
36
|
+
- name: "critical-hotfix"
|
|
37
|
+
input:
|
|
38
|
+
context: "修复生产环境严重安全漏洞"
|
|
39
|
+
complexity: 3
|
|
40
|
+
expectedBehaviors:
|
|
41
|
+
- "安全评估"
|
|
42
|
+
- "影响范围分析"
|
|
43
|
+
- "紧急修复"
|
|
44
|
+
- "安全审查"
|
|
45
|
+
successCriteria:
|
|
46
|
+
- "安全风险识别完整"
|
|
47
|
+
- "修复方案合理"
|
|
48
|
+
qualityMetrics:
|
|
49
|
+
- "修复耗时 < 30 分钟"
|
|
50
|
+
maxDuration: 1800
|
|
51
|
+
|
|
52
|
+
successCriteria:
|
|
53
|
+
passRate: 95
|
|
54
|
+
avgFieldCompletion: 95
|