@chongyan/autospec 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.en.md +447 -321
- package/README.md +418 -286
- package/knowledge/01-principles/00-principles-hierarchy.md +247 -0
- package/knowledge/01-principles/01-first-principles.md +241 -0
- package/knowledge/01-principles/02-strategic-principles.md +286 -0
- package/knowledge/01-principles/03-tactical-principles.md +385 -0
- package/knowledge/01-principles/04-operational-principles.md +275 -0
- package/knowledge/01-principles/05-domain-principles.md +539 -0
- package/knowledge/01-principles/06-methodology-principles.md +281 -0
- package/knowledge/01-principles/07-cognitive-principles.md +277 -0
- package/knowledge/01-principles/08-auto-fix-principles.md +320 -0
- package/knowledge/01-principles/09-constitution.md +220 -0
- package/knowledge/{principles/evolution.md → 01-principles/10-evolution-mechanism.md} +160 -14
- package/knowledge/01-principles/README.en.md +385 -0
- package/knowledge/01-principles/README.md +385 -0
- package/knowledge/{process/overview.md → 02-process/00-overview.md} +90 -5
- package/knowledge/02-process/README.en.md +143 -0
- package/knowledge/02-process/README.md +186 -0
- package/knowledge/{guides/support/pipeline-protocol.md → 03-guides/00-pipeline-protocol.md} +10 -10
- package/knowledge/{guides/support/team-orchestrator.md → 03-guides/01-team-orchestrator.md} +53 -8
- package/knowledge/{guides/stages/requirement-analyzer.md → 03-guides/02-analyze-requirement.md} +3 -3
- package/knowledge/{guides/stages/ai-effect-evaluator.md → 03-guides/08-evaluate-ai-effect.md} +14 -7
- package/knowledge/{guides/support/skill-distiller.md → 03-guides/19-distill-skill.md} +3 -3
- package/knowledge/{guides/support/skill-updater.md → 03-guides/20-update-skill.md} +1 -1
- package/knowledge/{guides/support/methodology-extractor.md → 03-guides/22-extract-methodology.md} +2 -2
- package/knowledge/{guides/support/complexity-assessor.md → 03-guides/24-assess-complexity.md} +6 -4
- package/knowledge/{guides/support/tech-stack-analyzer.md → 03-guides/26-analyze-tech-stack.md} +1 -1
- package/knowledge/{guides/domain-driven-design.md → 03-guides/42-apply-ddd.md} +1 -1
- package/knowledge/{process/ai-sdlc.md → 03-guides/43-run-ai-sdlc.md} +1 -1
- package/knowledge/{guides/knowledge-management.md → 03-guides/44-manage-knowledge.md} +4 -4
- package/knowledge/03-guides/README.en.md +212 -0
- package/knowledge/03-guides/README.md +212 -0
- package/knowledge/{checklists/requirement.md → 04-checklists/00-requirement.md} +1 -1
- package/knowledge/{checklists/design.md → 04-checklists/01-design.md} +1 -1
- package/knowledge/{checklists/code.md → 04-checklists/02-code.md} +16 -1
- package/knowledge/{checklists/release.md → 04-checklists/04-release.md} +1 -1
- package/knowledge/04-checklists/README.en.md +119 -0
- package/knowledge/04-checklists/README.md +123 -0
- package/knowledge/{config/validation-patterns.yaml → 05-config/00-validation-patterns.yaml} +1 -1
- package/knowledge/{config/team-tasks.yaml → 05-config/02-team-tasks.yaml} +2 -2
- package/knowledge/05-config/03-role-composition.yaml +346 -0
- package/knowledge/{config/skill-compositions.yaml → 05-config/05-skill-compositions.yaml} +24 -24
- package/knowledge/05-config/README.en.md +54 -0
- package/knowledge/05-config/README.md +132 -0
- package/knowledge/06-environment/00-template-registry.md +310 -0
- package/knowledge/06-environment/01-detection-patterns.yaml +1692 -0
- package/knowledge/{environment → 06-environment}/README.en.md +4 -0
- package/knowledge/{environment → 06-environment}/README.md +66 -25
- package/knowledge/{standards/coding-style.md → 07-standards/00-coding-style.md} +123 -4
- package/knowledge/{standards/code-review.md → 07-standards/01-code-review.md} +3 -3
- package/knowledge/{standards/data-consistency.md → 07-standards/02-data-consistency.md} +1 -1
- package/knowledge/{standards/document-versioning.md → 07-standards/03-document-versioning.md} +1 -1
- package/knowledge/{standards/risk-detection.md → 07-standards/04-risk-detection.md} +5 -5
- package/knowledge/07-standards/README.en.md +119 -0
- package/knowledge/07-standards/README.md +123 -0
- package/knowledge/08-organization/00-vision-mission.md +113 -0
- package/knowledge/{organization/ai-native-team.md → 08-organization/01-ai-native-culture.md} +1 -1
- package/knowledge/{organization/team-metrics.md → 08-organization/02-team-metrics.md} +1 -1
- package/knowledge/08-organization/03-committee-structure.md +54 -0
- package/knowledge/08-organization/04-governance-metrics.md +55 -0
- package/knowledge/08-organization/05-improvement-process.md +71 -0
- package/knowledge/08-organization/README.en.md +165 -0
- package/knowledge/08-organization/README.md +165 -0
- package/knowledge/09-templates/00-requirement-proposal.md +344 -0
- package/knowledge/09-templates/01-architecture-design.md +494 -0
- package/knowledge/09-templates/02-api-design.md +408 -0
- package/knowledge/09-templates/03-database-design.md +313 -0
- package/knowledge/09-templates/04-product-design.md +237 -0
- package/knowledge/09-templates/05-domain-business.md +388 -0
- package/knowledge/09-templates/06-test-design.md +268 -0
- package/knowledge/09-templates/07-evaluation-design.md +372 -0
- package/knowledge/09-templates/08-component-knowledge.md +272 -0
- package/knowledge/09-templates/09-best-practices.md +218 -0
- package/knowledge/{environment/middleware-knowledge.md → 09-templates/10-middleware-knowledge.md} +106 -1
- package/knowledge/09-templates/README.en.md +222 -0
- package/knowledge/09-templates/README.md +216 -0
- package/knowledge/README.en.md +372 -0
- package/knowledge/README.md +354 -99
- package/package.json +1 -1
- package/plugins/.claude-plugin/plugin.json +460 -81
- package/plugins/agents/roles/ceo.md +1 -1
- package/plugins/agents/roles/product-owner.md +1 -1
- package/plugins/agents/roles/tech-lead.md +1 -1
- package/plugins/agents/support/consistency-checker.md +36 -3
- package/plugins/agents/support/monitoring-agent.md +215 -0
- package/plugins/agents/support/safety-auditor.md +2 -2
- package/plugins/agents/support/stage-gate-evaluator.md +95 -11
- package/plugins/agents/support/test-coverage-reviewer.md +1 -1
- package/plugins/benchmarks/templates/README.md +165 -13
- package/plugins/benchmarks/templates/commands/apply-template.yaml +108 -0
- package/plugins/benchmarks/templates/commands/archive-template.yaml +65 -0
- package/plugins/benchmarks/templates/commands/env-export-template.yaml +64 -0
- package/plugins/benchmarks/templates/commands/env-sync-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/env-template-template.yaml +96 -0
- package/plugins/benchmarks/templates/commands/env-template.yaml +58 -0
- package/plugins/benchmarks/templates/commands/env-update-template.yaml +110 -0
- package/plugins/benchmarks/templates/commands/env-validate-template.yaml +95 -0
- package/plugins/benchmarks/templates/commands/field-evolve-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/project-evolve-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/propose-template.yaml +88 -0
- package/plugins/benchmarks/templates/commands/review-template.yaml +124 -0
- package/plugins/benchmarks/templates/commands/run-template.yaml +127 -0
- package/plugins/benchmarks/templates/commands/test-template.yaml +149 -0
- package/plugins/benchmarks/templates/pipeline/experiment-template.yaml +92 -0
- package/plugins/benchmarks/templates/pipeline/hotfix-template.yaml +81 -0
- package/plugins/benchmarks/templates/skills/agile-iteration-template.yaml +78 -0
- package/plugins/benchmarks/templates/skills/benchmark-executor-template.yaml +114 -0
- package/plugins/benchmarks/templates/skills/benchmark-generator-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/delivery-stage-template.yaml +130 -0
- package/plugins/benchmarks/templates/skills/design-stage-template.yaml +131 -0
- package/plugins/benchmarks/templates/skills/experiment-iteration-template.yaml +60 -0
- package/plugins/benchmarks/templates/skills/exploration-phase-template.yaml +114 -0
- package/plugins/benchmarks/templates/skills/field-evolve-analyzer-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/field-evolve-distiller-template.yaml +34 -0
- package/plugins/benchmarks/templates/skills/field-evolve-executor-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/field-evolve-fixer-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/field-evolve-learner-template.yaml +33 -0
- package/plugins/benchmarks/templates/skills/field-evolve-scanner-template.yaml +74 -0
- package/plugins/benchmarks/templates/skills/field-evolve-template.yaml +71 -0
- package/plugins/benchmarks/templates/skills/field-evolve-verifier-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/hotfix-iteration-template.yaml +54 -0
- package/plugins/benchmarks/templates/skills/implementation-stage-template.yaml +127 -0
- package/plugins/benchmarks/templates/skills/layer1-validation-template.yaml +121 -0
- package/plugins/benchmarks/templates/skills/project-evolve-analyzer-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/project-evolve-fixer-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/project-evolve-generator-template.yaml +34 -0
- package/plugins/benchmarks/templates/skills/project-evolve-learner-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/project-evolve-reviewer-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/project-evolve-scanner-template.yaml +75 -0
- package/plugins/benchmarks/templates/skills/project-evolve-template.yaml +72 -0
- package/plugins/benchmarks/templates/skills/project-evolve-verifier-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/skill-forge-template.yaml +117 -0
- package/plugins/benchmarks/templates/skills/startup-guard-template.yaml +103 -0
- package/plugins/benchmarks/templates/skills/testing-stage-template.yaml +146 -0
- package/plugins/benchmarks/templates/skills/waterfall-iteration-template.yaml +55 -0
- package/plugins/commands/README.en.md +2 -2
- package/plugins/commands/README.md +2 -2
- package/plugins/commands/apply.md +102 -16
- package/plugins/commands/archive.md +60 -4
- package/plugins/commands/env-sync.md +1047 -406
- package/plugins/commands/env-template.md +11 -135
- package/plugins/commands/env-update.md +1 -1
- package/plugins/commands/env-validate.md +3 -3
- package/plugins/commands/explore.md +118 -1
- package/plugins/commands/field-evolve.md +51 -175
- package/plugins/commands/project-evolve.md +167 -68
- package/plugins/commands/propose.md +97 -6
- package/plugins/commands/review.md +5 -5
- package/plugins/commands/run.md +841 -13
- package/plugins/commands/status.md +138 -17
- package/plugins/commands/test.md +389 -0
- package/plugins/hooks/constitution-guard.js +1 -1
- package/plugins/hooks/environment-autocommit.js +366 -24
- package/plugins/hooks/environment-manager.js +3 -2
- package/plugins/hooks/execution-tracker.js +109 -4
- package/plugins/hooks/layer1-validator.js +117 -1
- package/plugins/hooks/lib/auto-fix-loop.js +605 -0
- package/plugins/hooks/lib/environment-config-loader.js +11 -7
- package/plugins/hooks/lib/hook-state-manager.js +98 -0
- package/plugins/hooks/lib/memory-extractor.js +27 -5
- package/plugins/hooks/lib/memory-manager.js +1 -1
- package/plugins/hooks/lib/test-auto-fix.test.js +194 -0
- package/plugins/hooks/monitoring-trigger.js +467 -0
- package/plugins/skills/README.en.md +15 -3
- package/plugins/skills/README.md +21 -11
- package/plugins/skills/agile-iteration/SKILL.md +187 -0
- package/plugins/skills/delivery-stage/SKILL.md +133 -12
- package/plugins/skills/design-stage/SKILL.md +103 -12
- package/plugins/skills/experiment-evaluator/SKILL.md +271 -0
- package/plugins/skills/experiment-iteration/SKILL.md +154 -0
- package/plugins/skills/exploration-phase/SKILL.md +93 -10
- package/plugins/skills/field-evolve-analyzer/SKILL.md +65 -0
- package/plugins/skills/field-evolve-distiller/SKILL.md +66 -0
- package/plugins/skills/field-evolve-executor/SKILL.md +94 -0
- package/plugins/skills/field-evolve-executor/executor.js +342 -0
- package/plugins/skills/field-evolve-fixer/SKILL.md +69 -0
- package/plugins/skills/field-evolve-learner/SKILL.md +65 -0
- package/plugins/skills/field-evolve-scanner/SKILL.md +87 -0
- package/plugins/skills/field-evolve-scanner/scripts/fallback-scanner.js +288 -0
- package/plugins/skills/field-evolve-verifier/SKILL.md +64 -0
- package/plugins/skills/hotfix-iteration/SKILL.md +279 -0
- package/plugins/skills/implementation-stage/SKILL.md +156 -15
- package/plugins/skills/layer1-validation/SKILL.md +1 -1
- package/plugins/skills/pending-dashboard/SKILL.md +9 -8
- package/plugins/skills/project-evolve-analyzer/SKILL.md +95 -0
- package/plugins/skills/project-evolve-fixer/SKILL.md +99 -0
- package/plugins/skills/project-evolve-generator/SKILL.md +149 -0
- package/plugins/skills/project-evolve-learner/SKILL.md +103 -0
- package/plugins/skills/project-evolve-reviewer/SKILL.md +104 -0
- package/plugins/skills/project-evolve-scanner/SKILL.md +95 -0
- package/plugins/skills/project-evolve-scanner/scripts/dependency-reuse-checker.js +395 -0
- package/plugins/skills/project-evolve-scanner/scripts/subsystem-coverage.js +315 -0
- package/plugins/skills/project-evolve-verifier/SKILL.md +105 -0
- package/plugins/skills/requirement-stage/SKILL.md +47 -13
- package/plugins/skills/skill-forge/SKILL.md +2 -2
- package/plugins/skills/testing-stage/SKILL.md +583 -8
- package/plugins/skills/waterfall-iteration/SKILL.md +115 -0
- package/scripts/cli/index.js +1 -1
- package/scripts/cli/init.js +30 -4
- package/scripts/cli/list.js +3 -2
- package/scripts/config/commands.config.js +8 -8
- package/scripts/config/hooks.config.js +1 -1
- package/scripts/install/constants.js +204 -165
- package/scripts/state.js +210 -1
- package/knowledge/config/README.en.md +0 -44
- package/knowledge/config/README.md +0 -44
- package/knowledge/config/role-composition.yaml +0 -98
- package/knowledge/config/team-triggers.yaml +0 -198
- package/knowledge/domain/README.md +0 -115
- package/knowledge/domain/flows/README.md +0 -194
- package/knowledge/domain/glossary.md +0 -143
- package/knowledge/domain/rules.md +0 -138
- package/knowledge/environment/component-knowledge.md +0 -316
- package/knowledge/environment/detection-patterns.yaml +0 -502
- package/knowledge/environment/template-registry.md +0 -321
- package/knowledge/guides/requirement-engineering.md +0 -329
- package/knowledge/guides/system-design.md +0 -352
- package/knowledge/principles/constitution.md +0 -134
- package/knowledge/principles/core-principles.md +0 -368
- package/knowledge/principles/design-philosophy.md +0 -877
- package/knowledge/process/README.en.md +0 -38
- package/knowledge/process/README.md +0 -48
- package/knowledge/templates/ai-evaluation.md +0 -150
- package/knowledge/templates/api-design.md +0 -117
- package/knowledge/templates/database-design.md +0 -132
- package/knowledge/templates/domain-driven-design.md +0 -321
- package/knowledge/templates/product-proposal.md +0 -201
- package/knowledge/templates/system-design.md +0 -227
- package/knowledge/templates/task-breakdown.md +0 -107
- package/knowledge/templates/test-case.md +0 -170
- package/plugins/commands/validate.md +0 -108
- package/plugins/skills/benchmark-executor/README.md +0 -93
- package/plugins/skills/evolution-process/SKILL.md +0 -291
- package/plugins/skills/project-evolution/SKILL.md +0 -847
- package/scripts/evolution/evolution-router.js +0 -273
- package/scripts/evolution/evolution-signal-collector.js +0 -307
- package/scripts/evolution/knowledge-loader.js +0 -346
- package/scripts/evolution/marketplace.js +0 -317
- package/scripts/evolution/version-manager.js +0 -371
- /package/knowledge/{process → 02-process}/01-requirement.md +0 -0
- /package/knowledge/{process → 02-process}/02-design.md +0 -0
- /package/knowledge/{process → 02-process}/03-implementation.md +0 -0
- /package/knowledge/{process → 02-process}/04-review.md +0 -0
- /package/knowledge/{process → 02-process}/05-testing.md +0 -0
- /package/knowledge/{process → 02-process}/06-delivery.md +0 -0
- /package/knowledge/{guides/stages/design-planner.md → 03-guides/03-design-solution.md} +0 -0
- /package/knowledge/{guides/stages/code-implementer.md → 03-guides/04-implement-code.md} +0 -0
- /package/knowledge/{guides/stages/test-planner.md → 03-guides/05-plan-testing.md} +0 -0
- /package/knowledge/{guides/stages/test-generator.md → 03-guides/06-generate-tests.md} +0 -0
- /package/knowledge/{guides/stages/release-checker.md → 03-guides/07-check-release.md} +0 -0
- /package/knowledge/{guides/stages/requirement-reviewer.md → 03-guides/09-review-requirement.md} +0 -0
- /package/knowledge/{guides/stages/design-reviewer.md → 03-guides/10-review-design.md} +0 -0
- /package/knowledge/{guides/stages/code-reviewer.md → 03-guides/11-review-code.md} +0 -0
- /package/knowledge/{guides/stages/test-reviewer.md → 03-guides/12-review-testing.md} +0 -0
- /package/knowledge/{guides/stages/security-reviewer.md → 03-guides/13-audit-security.md} +0 -0
- /package/knowledge/{guides/stages/consistency-checker.md → 03-guides/14-check-consistency.md} +0 -0
- /package/knowledge/{guides/stages/unit-test-runner.md → 03-guides/15-run-unit-tests.md} +0 -0
- /package/knowledge/{guides/stages/integration-test-runner.md → 03-guides/16-run-integration-tests.md} +0 -0
- /package/knowledge/{guides/stages/test-context-analyzer.md → 03-guides/17-analyze-test-context.md} +0 -0
- /package/knowledge/{guides/support/practice-logger.md → 03-guides/18-log-practice.md} +0 -0
- /package/knowledge/{guides/support/skill-validator.md → 03-guides/21-validate-skill.md} +0 -0
- /package/knowledge/{guides/support/scope-inference.md → 03-guides/23-infer-scope.md} +0 -0
- /package/knowledge/{guides/support/component-discovery.md → 03-guides/25-discover-component.md} +0 -0
- /package/knowledge/{guides/support/environment-scanner.md → 03-guides/27-scan-environment.md} +0 -0
- /package/knowledge/{guides/support/environment-validator.md → 03-guides/28-validate-environment.md} +0 -0
- /package/knowledge/{guides/support/knowledge-generator.md → 03-guides/29-generate-knowledge.md} +0 -0
- /package/knowledge/{guides/support/ai-capability-analyzer.md → 03-guides/30-analyze-ai-capability.md} +0 -0
- /package/knowledge/{guides/support/ai-component-analyzer.md → 03-guides/31-analyze-ai-component.md} +0 -0
- /package/knowledge/{guides/support/ai-agent-analyzer.md → 03-guides/32-analyze-ai-agent.md} +0 -0
- /package/knowledge/{guides/support/ai-rag-analyzer.md → 03-guides/33-analyze-ai-rag.md} +0 -0
- /package/knowledge/{guides/support/ai-task-assessor.md → 03-guides/34-assess-ai-task.md} +0 -0
- /package/knowledge/{guides/support/ai-pipeline-evaluator.md → 03-guides/35-evaluate-ai-pipeline.md} +0 -0
- /package/knowledge/{guides/support/ai-artifact-evaluator.md → 03-guides/36-evaluate-ai-artifact.md} +0 -0
- /package/knowledge/{guides/support/ai-evaluation-planner.md → 03-guides/37-plan-ai-evaluation.md} +0 -0
- /package/knowledge/{guides/support/ai-path-evaluator.md → 03-guides/38-evaluate-ai-path.md} +0 -0
- /package/knowledge/{guides/support/ai-data-validator.md → 03-guides/39-validate-ai-data.md} +0 -0
- /package/knowledge/{guides/support/ai-anomaly-analyzer.md → 03-guides/40-detect-ai-anomaly.md} +0 -0
- /package/knowledge/{guides/support/ai-test-diagnostics.md → 03-guides/41-diagnose-ai-test.md} +0 -0
- /package/knowledge/{guides/support/test-runner.md → 03-guides/45-test-runner.md} +0 -0
- /package/knowledge/{checklists/test.md → 04-checklists/03-test.md} +0 -0
- /package/knowledge/{config/team-stage.yaml → 05-config/01-team-stage.yaml} +0 -0
- /package/knowledge/{config/role-extensions.yaml → 05-config/04-role-extensions.yaml} +0 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# AutoSpec Command Benchmark Template - Field-Evolve
|
|
2
|
+
# 适用于: 测试 field-evolve 命令(实战项目自进化)
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "command-field-evolve"
|
|
7
|
+
description: "Field-Evolve 命令基准测试"
|
|
8
|
+
|
|
9
|
+
type: command
|
|
10
|
+
target: field-evolve
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "quick-mode"
|
|
14
|
+
input: "--mode=quick"
|
|
15
|
+
complexity: 1
|
|
16
|
+
expectedBehaviors:
|
|
17
|
+
- "读取 config.json"
|
|
18
|
+
- "执行编译检查"
|
|
19
|
+
- "执行单元测试"
|
|
20
|
+
- "执行 Lint 检查"
|
|
21
|
+
expectedOutput:
|
|
22
|
+
- "test-result.json"
|
|
23
|
+
- "quick-report.md"
|
|
24
|
+
successCriteria:
|
|
25
|
+
- "所有检查项执行"
|
|
26
|
+
- "测试结果记录完整"
|
|
27
|
+
qualityMetrics:
|
|
28
|
+
- "检查执行率 = 100%"
|
|
29
|
+
maxDuration: 300
|
|
30
|
+
|
|
31
|
+
- name: "deep-mode"
|
|
32
|
+
input: "--mode=deep"
|
|
33
|
+
complexity: 5
|
|
34
|
+
expectedBehaviors:
|
|
35
|
+
- "扫描 benchmarks"
|
|
36
|
+
- "执行 benchmark 测试场景"
|
|
37
|
+
- "计算三维度评分"
|
|
38
|
+
expectedOutput:
|
|
39
|
+
- "deep-report.md"
|
|
40
|
+
- "evaluation.json"
|
|
41
|
+
successCriteria:
|
|
42
|
+
- "benchmark 执行完整"
|
|
43
|
+
- "三维度评分计算正确"
|
|
44
|
+
qualityMetrics:
|
|
45
|
+
- "benchmark 执行率 = 100%"
|
|
46
|
+
maxDuration: 1800
|
|
47
|
+
|
|
48
|
+
- name: "full-cycle"
|
|
49
|
+
input: "--mode=full --auto-fix"
|
|
50
|
+
complexity: 5
|
|
51
|
+
expectedBehaviors:
|
|
52
|
+
- "执行深度测试"
|
|
53
|
+
- "生成改进方案"
|
|
54
|
+
- "执行自动修复"
|
|
55
|
+
- "效果验证"
|
|
56
|
+
- "技能蒸馏"
|
|
57
|
+
expectedOutput:
|
|
58
|
+
- "full-report.md"
|
|
59
|
+
- "distilled-skills/"
|
|
60
|
+
successCriteria:
|
|
61
|
+
- "完整循环执行成功"
|
|
62
|
+
- "无退化发生"
|
|
63
|
+
qualityMetrics:
|
|
64
|
+
- "修复有效率 >= 85%"
|
|
65
|
+
maxDuration: 2400
|
|
66
|
+
|
|
67
|
+
- name: "distill-mode"
|
|
68
|
+
input: "--mode=distill"
|
|
69
|
+
complexity: 3
|
|
70
|
+
expectedBehaviors:
|
|
71
|
+
- "读取进化规则"
|
|
72
|
+
- "收集 practice-log"
|
|
73
|
+
- "执行技能蒸馏"
|
|
74
|
+
- "验证进化效果"
|
|
75
|
+
expectedOutput:
|
|
76
|
+
- "distill-report.md"
|
|
77
|
+
- "distilled-skills/"
|
|
78
|
+
successCriteria:
|
|
79
|
+
- "practice-log 完整读取"
|
|
80
|
+
- "技能提炼符合规范"
|
|
81
|
+
qualityMetrics:
|
|
82
|
+
- "技能规范率 >= 90%"
|
|
83
|
+
maxDuration: 600
|
|
84
|
+
|
|
85
|
+
- name: "generate-mode"
|
|
86
|
+
input: "--mode=generate"
|
|
87
|
+
complexity: 1
|
|
88
|
+
expectedBehaviors:
|
|
89
|
+
- "检测项目类型"
|
|
90
|
+
- "生成基础 benchmark"
|
|
91
|
+
- "生成 pipeline benchmark"
|
|
92
|
+
expectedOutput:
|
|
93
|
+
- "project-generated.yaml"
|
|
94
|
+
- "generate-report.md"
|
|
95
|
+
successCriteria:
|
|
96
|
+
- "项目类型识别正确"
|
|
97
|
+
- "benchmark 结构完整"
|
|
98
|
+
qualityMetrics:
|
|
99
|
+
- "类型识别率 = 100%"
|
|
100
|
+
maxDuration: 180
|
|
101
|
+
|
|
102
|
+
successCriteria:
|
|
103
|
+
passRate: 85
|
|
104
|
+
avgDuration: 600
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# AutoSpec Command Benchmark Template - Project-Evolve
|
|
2
|
+
# 适用于: 测试 project-evolve 命令(AI Native 项目自进化)
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "command-project-evolve"
|
|
7
|
+
description: "Project-Evolve 命令基准测试"
|
|
8
|
+
|
|
9
|
+
type: command
|
|
10
|
+
target: project-evolve
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "init-memory"
|
|
14
|
+
input: "--init-memory"
|
|
15
|
+
complexity: 1
|
|
16
|
+
expectedBehaviors:
|
|
17
|
+
- "扫描项目技术栈"
|
|
18
|
+
- "检测项目结构"
|
|
19
|
+
- "自动发现评测点"
|
|
20
|
+
- "创建记忆目录结构"
|
|
21
|
+
expectedOutput:
|
|
22
|
+
- ".autospec/memory/index.yaml"
|
|
23
|
+
- "初始化报告"
|
|
24
|
+
successCriteria:
|
|
25
|
+
- "技术栈识别正确"
|
|
26
|
+
- "评测点发现 >= 10 个"
|
|
27
|
+
qualityMetrics:
|
|
28
|
+
- "记忆创建率 = 100%"
|
|
29
|
+
maxDuration: 300
|
|
30
|
+
|
|
31
|
+
- name: "cruise-mode"
|
|
32
|
+
input: "--cruise"
|
|
33
|
+
complexity: 3
|
|
34
|
+
expectedBehaviors:
|
|
35
|
+
- "加载 validated 记忆"
|
|
36
|
+
- "执行代码扫描"
|
|
37
|
+
- "智能优先级排序"
|
|
38
|
+
- "L1 自动修复"
|
|
39
|
+
expectedOutput:
|
|
40
|
+
- "cruise-report.md"
|
|
41
|
+
- "auto-fixes.json"
|
|
42
|
+
successCriteria:
|
|
43
|
+
- "巡航执行完整"
|
|
44
|
+
- "自动修复正确"
|
|
45
|
+
qualityMetrics:
|
|
46
|
+
- "自动修复准确率 >= 90%"
|
|
47
|
+
maxDuration: 600
|
|
48
|
+
|
|
49
|
+
- name: "deep-evolution"
|
|
50
|
+
input: "--deep"
|
|
51
|
+
complexity: 5
|
|
52
|
+
expectedBehaviors:
|
|
53
|
+
- "六层全维度扫描"
|
|
54
|
+
- "关联分析"
|
|
55
|
+
- "生成改进方案"
|
|
56
|
+
- "区分 auto-fixable/manual-review"
|
|
57
|
+
expectedOutput:
|
|
58
|
+
- "deep-report.md"
|
|
59
|
+
- "improvements.md"
|
|
60
|
+
successCriteria:
|
|
61
|
+
- "六层扫描完整"
|
|
62
|
+
- "改进方案可执行"
|
|
63
|
+
qualityMetrics:
|
|
64
|
+
- "维度覆盖率 = 100%"
|
|
65
|
+
maxDuration: 1200
|
|
66
|
+
|
|
67
|
+
- name: "multi-system-focus"
|
|
68
|
+
input: "--focus=multi"
|
|
69
|
+
complexity: 5
|
|
70
|
+
expectedBehaviors:
|
|
71
|
+
- "识别所有子系统"
|
|
72
|
+
- "检查接口契约一致性"
|
|
73
|
+
- "检测共享逻辑抽离机会"
|
|
74
|
+
- "检测依赖循环"
|
|
75
|
+
expectedOutput:
|
|
76
|
+
- "multi-system-report.md"
|
|
77
|
+
- "dependency-graph.md"
|
|
78
|
+
successCriteria:
|
|
79
|
+
- "子系统识别完整"
|
|
80
|
+
- "契约不一致问题检出"
|
|
81
|
+
qualityMetrics:
|
|
82
|
+
- "子系统识别率 = 100%"
|
|
83
|
+
maxDuration: 900
|
|
84
|
+
|
|
85
|
+
- name: "review-mode"
|
|
86
|
+
input: "--review --last=7d"
|
|
87
|
+
complexity: 3
|
|
88
|
+
expectedBehaviors:
|
|
89
|
+
- "读取历史执行记录"
|
|
90
|
+
- "统计指标变化趋势"
|
|
91
|
+
- "分析有效改进"
|
|
92
|
+
expectedOutput:
|
|
93
|
+
- "review-report.md"
|
|
94
|
+
- "metrics-trend.md"
|
|
95
|
+
successCriteria:
|
|
96
|
+
- "历史数据完整读取"
|
|
97
|
+
- "趋势分析准确"
|
|
98
|
+
qualityMetrics:
|
|
99
|
+
- "趋势分析准确率 >= 90%"
|
|
100
|
+
maxDuration: 450
|
|
101
|
+
|
|
102
|
+
successCriteria:
|
|
103
|
+
passRate: 85
|
|
104
|
+
avgDuration: 600
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# AutoSpec Command Benchmark Template - Propose
|
|
2
|
+
# 适用于: 测试 propose 命令(方案设计)
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "command-propose"
|
|
7
|
+
description: "Propose 命令基准测试"
|
|
8
|
+
|
|
9
|
+
type: command
|
|
10
|
+
target: propose
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "simple-design"
|
|
14
|
+
input: "设计 {feature-name} 功能的方案"
|
|
15
|
+
complexity: 1
|
|
16
|
+
expectedArtifacts:
|
|
17
|
+
- "specs/{feature}/design.md"
|
|
18
|
+
expectedBehaviors:
|
|
19
|
+
- "读取需求文档"
|
|
20
|
+
- "设计 API 接口"
|
|
21
|
+
- "设计数据库表"
|
|
22
|
+
successCriteria:
|
|
23
|
+
- "设计文档存在"
|
|
24
|
+
- "包含 API 设计"
|
|
25
|
+
- "包含数据库设计"
|
|
26
|
+
qualityMetrics:
|
|
27
|
+
- "API 设计完整率 >= 90%"
|
|
28
|
+
- "数据库设计合理率 >= 90%"
|
|
29
|
+
maxDuration: 300
|
|
30
|
+
|
|
31
|
+
- name: "multi-system-design"
|
|
32
|
+
input: "设计 {feature} 功能,包含 {subsystems}"
|
|
33
|
+
complexity: 5
|
|
34
|
+
expectedArtifacts:
|
|
35
|
+
- "specs/{feature}/design/overview.md"
|
|
36
|
+
- "specs/{feature}/design/backend.md"
|
|
37
|
+
- "specs/{feature}/design/frontend.md"
|
|
38
|
+
- "contracts/api.yaml"
|
|
39
|
+
expectedBehaviors:
|
|
40
|
+
- "设计整体架构"
|
|
41
|
+
- "各子系统独立设计"
|
|
42
|
+
- "定义系统间接口"
|
|
43
|
+
successCriteria:
|
|
44
|
+
- "整体架构清晰"
|
|
45
|
+
- "每个子系统有独立设计"
|
|
46
|
+
- "系统间接口定义完整"
|
|
47
|
+
qualityMetrics:
|
|
48
|
+
- "架构合理率 >= 90%"
|
|
49
|
+
- "接口完整率 >= 90%"
|
|
50
|
+
maxDuration: 600
|
|
51
|
+
|
|
52
|
+
- name: "ai-design"
|
|
53
|
+
input: "设计 AI 功能方案,使用 {model-type}"
|
|
54
|
+
complexity: 5
|
|
55
|
+
expectedArtifacts:
|
|
56
|
+
- "specs/{feature}/design.md"
|
|
57
|
+
- "specs/{feature}/evaluation.md"
|
|
58
|
+
expectedBehaviors:
|
|
59
|
+
- "设计模型选型"
|
|
60
|
+
- "设计训练流程"
|
|
61
|
+
- "设计效果评估方案"
|
|
62
|
+
successCriteria:
|
|
63
|
+
- "模型选型合理"
|
|
64
|
+
- "效果评估方案完整"
|
|
65
|
+
qualityMetrics:
|
|
66
|
+
- "技术选型合理率 >= 90%"
|
|
67
|
+
- "评估方案完整率 >= 90%"
|
|
68
|
+
maxDuration: 600
|
|
69
|
+
|
|
70
|
+
- name: "security-sensitive-design"
|
|
71
|
+
input: "设计支付系统方案"
|
|
72
|
+
complexity: 5
|
|
73
|
+
expectedBehaviors:
|
|
74
|
+
- "识别安全风险"
|
|
75
|
+
- "设计安全策略"
|
|
76
|
+
- "设计审计日志"
|
|
77
|
+
successCriteria:
|
|
78
|
+
- "安全风险识别完整"
|
|
79
|
+
- "安全策略合理"
|
|
80
|
+
- "审计日志设计完整"
|
|
81
|
+
qualityMetrics:
|
|
82
|
+
- "安全考虑完整率 >= 90%"
|
|
83
|
+
- "合规性 >= 100%"
|
|
84
|
+
maxDuration: 600
|
|
85
|
+
|
|
86
|
+
successCriteria:
|
|
87
|
+
passRate: 90
|
|
88
|
+
avgDuration: 450
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# AutoSpec Command Benchmark Template - Review
|
|
2
|
+
# 适用于: 测试 review 命令(审查交付物)
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "command-review"
|
|
7
|
+
description: "Review 命令基准测试"
|
|
8
|
+
|
|
9
|
+
type: command
|
|
10
|
+
target: review
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "requirement-review"
|
|
14
|
+
input: "review requirement ./specs/{feature}/requirement.md"
|
|
15
|
+
complexity: 1
|
|
16
|
+
expectedOutput:
|
|
17
|
+
- "审查结论"
|
|
18
|
+
- "逐项判定表"
|
|
19
|
+
- "blocking 问题清单"
|
|
20
|
+
- "non-blocking 建议"
|
|
21
|
+
successCriteria:
|
|
22
|
+
- "审查报告结构完整"
|
|
23
|
+
- "逐项判定有证据支撑"
|
|
24
|
+
qualityMetrics:
|
|
25
|
+
- "问题检出率 >= 90%"
|
|
26
|
+
- "误报率 <= 10%"
|
|
27
|
+
maxDuration: 300
|
|
28
|
+
|
|
29
|
+
- name: "design-review"
|
|
30
|
+
input: "review design ./specs/{feature}/design.md"
|
|
31
|
+
complexity: 3
|
|
32
|
+
expectedOutput:
|
|
33
|
+
- "审查结论"
|
|
34
|
+
- "架构设计判定"
|
|
35
|
+
- "API 设计判定"
|
|
36
|
+
- "数据库设计判定"
|
|
37
|
+
- "安全考虑判定"
|
|
38
|
+
successCriteria:
|
|
39
|
+
- "各设计维度审查完整"
|
|
40
|
+
- "设计问题识别准确"
|
|
41
|
+
qualityMetrics:
|
|
42
|
+
- "设计问题检出率 >= 85%"
|
|
43
|
+
- "建议合理率 >= 90%"
|
|
44
|
+
maxDuration: 450
|
|
45
|
+
|
|
46
|
+
- name: "code-review"
|
|
47
|
+
input: "review code ./src/"
|
|
48
|
+
complexity: 3
|
|
49
|
+
expectedOutput:
|
|
50
|
+
- "审查结论"
|
|
51
|
+
- "代码质量评分"
|
|
52
|
+
- "安全问题检测"
|
|
53
|
+
- "性能问题检测"
|
|
54
|
+
successCriteria:
|
|
55
|
+
- "代码审查维度完整"
|
|
56
|
+
- "安全漏洞识别准确"
|
|
57
|
+
qualityMetrics:
|
|
58
|
+
- "代码问题检出率 >= 85%"
|
|
59
|
+
- "安全漏洞检出率 >= 90%"
|
|
60
|
+
maxDuration: 600
|
|
61
|
+
|
|
62
|
+
- name: "test-review"
|
|
63
|
+
input: "review test ./tests/"
|
|
64
|
+
complexity: 3
|
|
65
|
+
expectedOutput:
|
|
66
|
+
- "审查结论"
|
|
67
|
+
- "测试覆盖分析"
|
|
68
|
+
- "边界条件检查"
|
|
69
|
+
- "异常路径检查"
|
|
70
|
+
successCriteria:
|
|
71
|
+
- "测试覆盖分析准确"
|
|
72
|
+
- "遗漏测试识别有效"
|
|
73
|
+
qualityMetrics:
|
|
74
|
+
- "测试遗漏检出率 >= 85%"
|
|
75
|
+
- "覆盖率评估准确率 >= 90%"
|
|
76
|
+
maxDuration: 450
|
|
77
|
+
|
|
78
|
+
- name: "git-repo-review"
|
|
79
|
+
input: "review code https://github.com/{org}/{repo} --branch {branch}"
|
|
80
|
+
complexity: 5
|
|
81
|
+
expectedBehaviors:
|
|
82
|
+
- "自动克隆 git 仓库"
|
|
83
|
+
- "指定分支检出"
|
|
84
|
+
- "执行代码审查"
|
|
85
|
+
- "生成审查报告"
|
|
86
|
+
successCriteria:
|
|
87
|
+
- "git 克隆成功"
|
|
88
|
+
- "分支检出正确"
|
|
89
|
+
- "审查报告完整"
|
|
90
|
+
qualityMetrics:
|
|
91
|
+
- "仓库审查成功率 = 100%"
|
|
92
|
+
- "问题检出率 >= 80%"
|
|
93
|
+
maxDuration: 900
|
|
94
|
+
|
|
95
|
+
- name: "auto-review"
|
|
96
|
+
input: ""
|
|
97
|
+
complexity: 1
|
|
98
|
+
expectedBehaviors:
|
|
99
|
+
- "读取 state.json 确定当前阶段"
|
|
100
|
+
- "自动推断审查类型"
|
|
101
|
+
- "自动定位审查目标"
|
|
102
|
+
successCriteria:
|
|
103
|
+
- "自动推断正确"
|
|
104
|
+
- "审查目标定位准确"
|
|
105
|
+
qualityMetrics:
|
|
106
|
+
- "自动推断准确率 >= 95%"
|
|
107
|
+
maxDuration: 300
|
|
108
|
+
|
|
109
|
+
- name: "multi-system-review"
|
|
110
|
+
input: "review code ./src/ --subsystem=backend"
|
|
111
|
+
complexity: 5
|
|
112
|
+
expectedBehaviors:
|
|
113
|
+
- "识别多系统配置"
|
|
114
|
+
- "审查指定子系统"
|
|
115
|
+
successCriteria:
|
|
116
|
+
- "子系统识别正确"
|
|
117
|
+
- "审查范围准确"
|
|
118
|
+
qualityMetrics:
|
|
119
|
+
- "子系统审查准确率 = 100%"
|
|
120
|
+
maxDuration: 600
|
|
121
|
+
|
|
122
|
+
successCriteria:
|
|
123
|
+
passRate: 90
|
|
124
|
+
avgDuration: 400
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# AutoSpec Command Benchmark Template - Run
|
|
2
|
+
# 适用于: 测试 run 命令(完整流程)
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "command-run"
|
|
7
|
+
description: "Run 命令基准测试 - 启动完整流程"
|
|
8
|
+
|
|
9
|
+
type: command
|
|
10
|
+
target: run
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "waterfall-simple"
|
|
14
|
+
input: "实现 {feature-name} --workflow=waterfall"
|
|
15
|
+
complexity: 1
|
|
16
|
+
expectedStages:
|
|
17
|
+
- exploration
|
|
18
|
+
- design
|
|
19
|
+
- implement
|
|
20
|
+
- test
|
|
21
|
+
- deliver
|
|
22
|
+
expectedArtifacts:
|
|
23
|
+
- "specs/{feature}/requirement.md"
|
|
24
|
+
- "specs/{feature}/design.md"
|
|
25
|
+
- "src/ 源代码"
|
|
26
|
+
- "tests/ 测试代码"
|
|
27
|
+
- "release-notes.md"
|
|
28
|
+
successCriteria:
|
|
29
|
+
- "完整执行 5 个阶段"
|
|
30
|
+
- "每个阶段通过 Layer1+Layer2 验证"
|
|
31
|
+
- "产出物完整"
|
|
32
|
+
qualityMetrics:
|
|
33
|
+
- "阶段完成率 = 100%"
|
|
34
|
+
- "Layer1 通过率 >= 90%"
|
|
35
|
+
- "Layer2 通过率 >= 80%"
|
|
36
|
+
maxDuration: 3600
|
|
37
|
+
|
|
38
|
+
- name: "agile-iteration"
|
|
39
|
+
input: "实现 {feature-name} --workflow=agile"
|
|
40
|
+
complexity: 3
|
|
41
|
+
expectedStages:
|
|
42
|
+
- exploration
|
|
43
|
+
- story-iterations
|
|
44
|
+
- integration-test
|
|
45
|
+
- deliver
|
|
46
|
+
expectedArtifacts:
|
|
47
|
+
- "specs/{feature}/requirement.md"
|
|
48
|
+
- "specs/{feature}/stories.md"
|
|
49
|
+
- "src/ 源代码"
|
|
50
|
+
- "tests/ 测试代码"
|
|
51
|
+
successCriteria:
|
|
52
|
+
- "用户故事拆分合理"
|
|
53
|
+
- "集成测试通过"
|
|
54
|
+
qualityMetrics:
|
|
55
|
+
- "故事完成率 >= 90%"
|
|
56
|
+
- "集成测试通过率 = 100%"
|
|
57
|
+
maxDuration: 1800
|
|
58
|
+
|
|
59
|
+
- name: "experiment-mode"
|
|
60
|
+
input: "实现 {feature-name} --workflow=experiment"
|
|
61
|
+
complexity: 5
|
|
62
|
+
expectedStages:
|
|
63
|
+
- exploration
|
|
64
|
+
- design
|
|
65
|
+
- prototype-implement
|
|
66
|
+
- evaluation
|
|
67
|
+
- assumption-validation
|
|
68
|
+
expectedArtifacts:
|
|
69
|
+
- "specs/{feature}/assumptions.md"
|
|
70
|
+
- "specs/{feature}/evaluation-plan.md"
|
|
71
|
+
- "specs/{feature}/evaluation-report.md"
|
|
72
|
+
successCriteria:
|
|
73
|
+
- "假设定义清晰可验证"
|
|
74
|
+
- "效果评测自动执行"
|
|
75
|
+
qualityMetrics:
|
|
76
|
+
- "假设完整率 >= 90%"
|
|
77
|
+
- "效果达标率 >= 80%"
|
|
78
|
+
maxDuration: 2700
|
|
79
|
+
|
|
80
|
+
- name: "hotfix-mode"
|
|
81
|
+
input: "修复 {bug-description} --workflow=hotfix"
|
|
82
|
+
complexity: 1
|
|
83
|
+
expectedStages:
|
|
84
|
+
- diagnosis
|
|
85
|
+
- risk-assessment
|
|
86
|
+
- quick-fix
|
|
87
|
+
- quick-review
|
|
88
|
+
- quick-deploy
|
|
89
|
+
expectedArtifacts:
|
|
90
|
+
- "specs/hotfix-{id}/hotfix-issue.md"
|
|
91
|
+
- "risk-assessment.md"
|
|
92
|
+
- "变更代码"
|
|
93
|
+
- "回归测试"
|
|
94
|
+
successCriteria:
|
|
95
|
+
- "问题定位准确"
|
|
96
|
+
- "变更最小化"
|
|
97
|
+
- "回归测试通过"
|
|
98
|
+
qualityMetrics:
|
|
99
|
+
- "修复耗时 < 15 分钟"
|
|
100
|
+
maxDuration: 900
|
|
101
|
+
|
|
102
|
+
- name: "multi-system-waterfall"
|
|
103
|
+
input: "实现 {feature},包含后端/前端/移动端 --workflow=waterfall"
|
|
104
|
+
complexity: 5
|
|
105
|
+
expectedStages:
|
|
106
|
+
- exploration
|
|
107
|
+
- design
|
|
108
|
+
- implement
|
|
109
|
+
- test
|
|
110
|
+
- deliver
|
|
111
|
+
expectedArtifacts:
|
|
112
|
+
- "specs/{feature}/design/overview.md"
|
|
113
|
+
- "specs/{feature}/design/backend.md"
|
|
114
|
+
- "specs/{feature}/design/frontend.md"
|
|
115
|
+
- "contracts/api.yaml"
|
|
116
|
+
successCriteria:
|
|
117
|
+
- "识别所有子系统"
|
|
118
|
+
- "编码顺序正确"
|
|
119
|
+
- "集成测试通过"
|
|
120
|
+
qualityMetrics:
|
|
121
|
+
- "子系统识别率 = 100%"
|
|
122
|
+
- "依赖顺序正确率 = 100%"
|
|
123
|
+
maxDuration: 5400
|
|
124
|
+
|
|
125
|
+
successCriteria:
|
|
126
|
+
passRate: 85
|
|
127
|
+
avgDuration: 1800
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# AutoSpec Command Benchmark Template - Test
|
|
2
|
+
# 适用于: 测试 test 命令(验证+修复+评测)
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "command-test"
|
|
7
|
+
description: "Test 命令基准测试 - 统一测试命令"
|
|
8
|
+
|
|
9
|
+
type: command
|
|
10
|
+
target: test
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "validation-only"
|
|
14
|
+
input: "/autospec:test"
|
|
15
|
+
complexity: 1
|
|
16
|
+
expectedBehaviors:
|
|
17
|
+
- "执行编译检查"
|
|
18
|
+
- "执行测试"
|
|
19
|
+
- "执行 Lint 检查"
|
|
20
|
+
- "执行类型检查"
|
|
21
|
+
expectedOutput:
|
|
22
|
+
- "验证报告"
|
|
23
|
+
- "各项验证结果"
|
|
24
|
+
successCriteria:
|
|
25
|
+
- "验证步骤完整"
|
|
26
|
+
- "结果报告清晰"
|
|
27
|
+
qualityMetrics:
|
|
28
|
+
- "验证执行率 = 100%"
|
|
29
|
+
- "报告完整率 >= 95%"
|
|
30
|
+
maxDuration: 300
|
|
31
|
+
|
|
32
|
+
- name: "validation-with-fix"
|
|
33
|
+
input: "/autospec:test --fix"
|
|
34
|
+
complexity: 3
|
|
35
|
+
expectedBehaviors:
|
|
36
|
+
- "执行 Layer1 验证"
|
|
37
|
+
- "问题分析"
|
|
38
|
+
- "修复分类"
|
|
39
|
+
- "执行修复"
|
|
40
|
+
- "效果验证"
|
|
41
|
+
expectedOutput:
|
|
42
|
+
- "验证报告"
|
|
43
|
+
- "问题分析报告"
|
|
44
|
+
- "修复记录"
|
|
45
|
+
successCriteria:
|
|
46
|
+
- "问题分类准确"
|
|
47
|
+
- "修复有效"
|
|
48
|
+
- "退化检测正确"
|
|
49
|
+
qualityMetrics:
|
|
50
|
+
- "修复有效率 >= 85%"
|
|
51
|
+
- "退化检出率 = 100%"
|
|
52
|
+
maxDuration: 600
|
|
53
|
+
|
|
54
|
+
- name: "subsystem-test"
|
|
55
|
+
input: "/autospec:test --subsystem=backend --fix"
|
|
56
|
+
complexity: 3
|
|
57
|
+
expectedBehaviors:
|
|
58
|
+
- "读取 config.json 获取子系统配置"
|
|
59
|
+
- "只验证指定子系统"
|
|
60
|
+
- "执行子系统修复"
|
|
61
|
+
successCriteria:
|
|
62
|
+
- "子系统识别正确"
|
|
63
|
+
- "验证范围准确"
|
|
64
|
+
qualityMetrics:
|
|
65
|
+
- "子系统验证准确率 = 100%"
|
|
66
|
+
maxDuration: 450
|
|
67
|
+
|
|
68
|
+
- name: "ai-evaluation"
|
|
69
|
+
input: "/autospec:test --scope=evaluation"
|
|
70
|
+
complexity: 5
|
|
71
|
+
expectedBehaviors:
|
|
72
|
+
- "检测 AI/模型组件"
|
|
73
|
+
- "加载评测数据集"
|
|
74
|
+
- "执行效果评测"
|
|
75
|
+
- "生成评测报告"
|
|
76
|
+
expectedOutput:
|
|
77
|
+
- "评测报告"
|
|
78
|
+
- "指标结果"
|
|
79
|
+
- "Badcase 分析"
|
|
80
|
+
successCriteria:
|
|
81
|
+
- "评测方案执行完整"
|
|
82
|
+
- "指标计算准确"
|
|
83
|
+
qualityMetrics:
|
|
84
|
+
- "评测完整率 = 100%"
|
|
85
|
+
- "Badcase 检出率 >= 90%"
|
|
86
|
+
maxDuration: 900
|
|
87
|
+
|
|
88
|
+
- name: "git-repo-test"
|
|
89
|
+
input: "/autospec:test https://github.com/{org}/{repo} --branch main"
|
|
90
|
+
complexity: 5
|
|
91
|
+
expectedBehaviors:
|
|
92
|
+
- "克隆 git 仓库"
|
|
93
|
+
- "检出指定分支"
|
|
94
|
+
- "执行验证"
|
|
95
|
+
successCriteria:
|
|
96
|
+
- "git 克隆成功"
|
|
97
|
+
- "验证执行完整"
|
|
98
|
+
qualityMetrics:
|
|
99
|
+
- "仓库测试成功率 >= 95%"
|
|
100
|
+
maxDuration: 600
|
|
101
|
+
|
|
102
|
+
- name: "deep-analysis"
|
|
103
|
+
input: "/autospec:test --deep --fix"
|
|
104
|
+
complexity: 5
|
|
105
|
+
expectedBehaviors:
|
|
106
|
+
- "执行完整验证"
|
|
107
|
+
- "深度问题分析"
|
|
108
|
+
- "根因识别"
|
|
109
|
+
- "全面修复"
|
|
110
|
+
expectedOutput:
|
|
111
|
+
- "深度分析报告"
|
|
112
|
+
- "根因分析"
|
|
113
|
+
- "质量评分"
|
|
114
|
+
successCriteria:
|
|
115
|
+
- "根因识别准确"
|
|
116
|
+
- "质量提升明显"
|
|
117
|
+
qualityMetrics:
|
|
118
|
+
- "根因识别率 >= 90%"
|
|
119
|
+
- "质量提升 >= 10%"
|
|
120
|
+
maxDuration: 900
|
|
121
|
+
|
|
122
|
+
- name: "unit-test-scope"
|
|
123
|
+
input: "/autospec:test --scope=unit"
|
|
124
|
+
complexity: 1
|
|
125
|
+
expectedBehaviors:
|
|
126
|
+
- "只执行单元测试"
|
|
127
|
+
- "跳过集成测试"
|
|
128
|
+
successCriteria:
|
|
129
|
+
- "测试范围准确"
|
|
130
|
+
qualityMetrics:
|
|
131
|
+
- "单元测试覆盖率 >= 95%"
|
|
132
|
+
maxDuration: 180
|
|
133
|
+
|
|
134
|
+
- name: "integration-test-scope"
|
|
135
|
+
input: "/autospec:test --scope=integration --fix"
|
|
136
|
+
complexity: 3
|
|
137
|
+
expectedBehaviors:
|
|
138
|
+
- "执行集成测试"
|
|
139
|
+
- "识别集成问题"
|
|
140
|
+
- "修复集成问题"
|
|
141
|
+
successCriteria:
|
|
142
|
+
- "集成测试执行完整"
|
|
143
|
+
qualityMetrics:
|
|
144
|
+
- "集成测试覆盖率 = 100%"
|
|
145
|
+
maxDuration: 450
|
|
146
|
+
|
|
147
|
+
successCriteria:
|
|
148
|
+
passRate: 90
|
|
149
|
+
avgDuration: 450
|