@chongyan/autospec 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.en.md +447 -321
- package/README.md +418 -286
- package/knowledge/01-principles/00-principles-hierarchy.md +247 -0
- package/knowledge/01-principles/01-first-principles.md +241 -0
- package/knowledge/01-principles/02-strategic-principles.md +286 -0
- package/knowledge/01-principles/03-tactical-principles.md +385 -0
- package/knowledge/01-principles/04-operational-principles.md +275 -0
- package/knowledge/01-principles/05-domain-principles.md +539 -0
- package/knowledge/01-principles/06-methodology-principles.md +281 -0
- package/knowledge/01-principles/07-cognitive-principles.md +277 -0
- package/knowledge/01-principles/08-auto-fix-principles.md +320 -0
- package/knowledge/01-principles/09-constitution.md +220 -0
- package/knowledge/{principles/evolution.md → 01-principles/10-evolution-mechanism.md} +160 -14
- package/knowledge/01-principles/README.en.md +385 -0
- package/knowledge/01-principles/README.md +385 -0
- package/knowledge/{process/overview.md → 02-process/00-overview.md} +90 -5
- package/knowledge/02-process/README.en.md +143 -0
- package/knowledge/02-process/README.md +186 -0
- package/knowledge/{guides/support/pipeline-protocol.md → 03-guides/00-pipeline-protocol.md} +10 -10
- package/knowledge/{guides/support/team-orchestrator.md → 03-guides/01-team-orchestrator.md} +53 -8
- package/knowledge/{guides/stages/requirement-analyzer.md → 03-guides/02-analyze-requirement.md} +3 -3
- package/knowledge/{guides/stages/ai-effect-evaluator.md → 03-guides/08-evaluate-ai-effect.md} +14 -7
- package/knowledge/{guides/support/skill-distiller.md → 03-guides/19-distill-skill.md} +3 -3
- package/knowledge/{guides/support/skill-updater.md → 03-guides/20-update-skill.md} +1 -1
- package/knowledge/{guides/support/methodology-extractor.md → 03-guides/22-extract-methodology.md} +2 -2
- package/knowledge/{guides/support/complexity-assessor.md → 03-guides/24-assess-complexity.md} +6 -4
- package/knowledge/{guides/support/tech-stack-analyzer.md → 03-guides/26-analyze-tech-stack.md} +1 -1
- package/knowledge/{guides/domain-driven-design.md → 03-guides/42-apply-ddd.md} +1 -1
- package/knowledge/{process/ai-sdlc.md → 03-guides/43-run-ai-sdlc.md} +1 -1
- package/knowledge/{guides/knowledge-management.md → 03-guides/44-manage-knowledge.md} +4 -4
- package/knowledge/03-guides/README.en.md +212 -0
- package/knowledge/03-guides/README.md +212 -0
- package/knowledge/{checklists/requirement.md → 04-checklists/00-requirement.md} +1 -1
- package/knowledge/{checklists/design.md → 04-checklists/01-design.md} +1 -1
- package/knowledge/{checklists/code.md → 04-checklists/02-code.md} +16 -1
- package/knowledge/{checklists/release.md → 04-checklists/04-release.md} +1 -1
- package/knowledge/04-checklists/README.en.md +119 -0
- package/knowledge/04-checklists/README.md +123 -0
- package/knowledge/{config/validation-patterns.yaml → 05-config/00-validation-patterns.yaml} +1 -1
- package/knowledge/{config/team-tasks.yaml → 05-config/02-team-tasks.yaml} +2 -2
- package/knowledge/05-config/03-role-composition.yaml +346 -0
- package/knowledge/{config/skill-compositions.yaml → 05-config/05-skill-compositions.yaml} +24 -24
- package/knowledge/05-config/README.en.md +54 -0
- package/knowledge/05-config/README.md +132 -0
- package/knowledge/06-environment/00-template-registry.md +310 -0
- package/knowledge/06-environment/01-detection-patterns.yaml +1692 -0
- package/knowledge/{environment → 06-environment}/README.en.md +4 -0
- package/knowledge/{environment → 06-environment}/README.md +66 -25
- package/knowledge/{standards/coding-style.md → 07-standards/00-coding-style.md} +123 -4
- package/knowledge/{standards/code-review.md → 07-standards/01-code-review.md} +3 -3
- package/knowledge/{standards/data-consistency.md → 07-standards/02-data-consistency.md} +1 -1
- package/knowledge/{standards/document-versioning.md → 07-standards/03-document-versioning.md} +1 -1
- package/knowledge/{standards/risk-detection.md → 07-standards/04-risk-detection.md} +5 -5
- package/knowledge/07-standards/README.en.md +119 -0
- package/knowledge/07-standards/README.md +123 -0
- package/knowledge/08-organization/00-vision-mission.md +113 -0
- package/knowledge/{organization/ai-native-team.md → 08-organization/01-ai-native-culture.md} +1 -1
- package/knowledge/{organization/team-metrics.md → 08-organization/02-team-metrics.md} +1 -1
- package/knowledge/08-organization/03-committee-structure.md +54 -0
- package/knowledge/08-organization/04-governance-metrics.md +55 -0
- package/knowledge/08-organization/05-improvement-process.md +71 -0
- package/knowledge/08-organization/README.en.md +165 -0
- package/knowledge/08-organization/README.md +165 -0
- package/knowledge/09-templates/00-requirement-proposal.md +344 -0
- package/knowledge/09-templates/01-architecture-design.md +494 -0
- package/knowledge/09-templates/02-api-design.md +408 -0
- package/knowledge/09-templates/03-database-design.md +313 -0
- package/knowledge/09-templates/04-product-design.md +237 -0
- package/knowledge/09-templates/05-domain-business.md +388 -0
- package/knowledge/09-templates/06-test-design.md +268 -0
- package/knowledge/09-templates/07-evaluation-design.md +372 -0
- package/knowledge/09-templates/08-component-knowledge.md +272 -0
- package/knowledge/09-templates/09-best-practices.md +218 -0
- package/knowledge/{environment/middleware-knowledge.md → 09-templates/10-middleware-knowledge.md} +106 -1
- package/knowledge/09-templates/README.en.md +222 -0
- package/knowledge/09-templates/README.md +216 -0
- package/knowledge/README.en.md +372 -0
- package/knowledge/README.md +354 -99
- package/package.json +1 -1
- package/plugins/.claude-plugin/plugin.json +460 -81
- package/plugins/agents/roles/ceo.md +1 -1
- package/plugins/agents/roles/product-owner.md +1 -1
- package/plugins/agents/roles/tech-lead.md +1 -1
- package/plugins/agents/support/consistency-checker.md +36 -3
- package/plugins/agents/support/monitoring-agent.md +215 -0
- package/plugins/agents/support/safety-auditor.md +2 -2
- package/plugins/agents/support/stage-gate-evaluator.md +95 -11
- package/plugins/agents/support/test-coverage-reviewer.md +1 -1
- package/plugins/benchmarks/templates/README.md +165 -13
- package/plugins/benchmarks/templates/commands/apply-template.yaml +108 -0
- package/plugins/benchmarks/templates/commands/archive-template.yaml +65 -0
- package/plugins/benchmarks/templates/commands/env-export-template.yaml +64 -0
- package/plugins/benchmarks/templates/commands/env-sync-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/env-template-template.yaml +96 -0
- package/plugins/benchmarks/templates/commands/env-template.yaml +58 -0
- package/plugins/benchmarks/templates/commands/env-update-template.yaml +110 -0
- package/plugins/benchmarks/templates/commands/env-validate-template.yaml +95 -0
- package/plugins/benchmarks/templates/commands/field-evolve-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/project-evolve-template.yaml +104 -0
- package/plugins/benchmarks/templates/commands/propose-template.yaml +88 -0
- package/plugins/benchmarks/templates/commands/review-template.yaml +124 -0
- package/plugins/benchmarks/templates/commands/run-template.yaml +127 -0
- package/plugins/benchmarks/templates/commands/test-template.yaml +149 -0
- package/plugins/benchmarks/templates/pipeline/experiment-template.yaml +92 -0
- package/plugins/benchmarks/templates/pipeline/hotfix-template.yaml +81 -0
- package/plugins/benchmarks/templates/skills/agile-iteration-template.yaml +78 -0
- package/plugins/benchmarks/templates/skills/benchmark-executor-template.yaml +114 -0
- package/plugins/benchmarks/templates/skills/benchmark-generator-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/delivery-stage-template.yaml +130 -0
- package/plugins/benchmarks/templates/skills/design-stage-template.yaml +131 -0
- package/plugins/benchmarks/templates/skills/experiment-iteration-template.yaml +60 -0
- package/plugins/benchmarks/templates/skills/exploration-phase-template.yaml +114 -0
- package/plugins/benchmarks/templates/skills/field-evolve-analyzer-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/field-evolve-distiller-template.yaml +34 -0
- package/plugins/benchmarks/templates/skills/field-evolve-executor-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/field-evolve-fixer-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/field-evolve-learner-template.yaml +33 -0
- package/plugins/benchmarks/templates/skills/field-evolve-scanner-template.yaml +74 -0
- package/plugins/benchmarks/templates/skills/field-evolve-template.yaml +71 -0
- package/plugins/benchmarks/templates/skills/field-evolve-verifier-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/hotfix-iteration-template.yaml +54 -0
- package/plugins/benchmarks/templates/skills/implementation-stage-template.yaml +127 -0
- package/plugins/benchmarks/templates/skills/layer1-validation-template.yaml +121 -0
- package/plugins/benchmarks/templates/skills/project-evolve-analyzer-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/project-evolve-fixer-template.yaml +52 -0
- package/plugins/benchmarks/templates/skills/project-evolve-generator-template.yaml +34 -0
- package/plugins/benchmarks/templates/skills/project-evolve-learner-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/project-evolve-reviewer-template.yaml +50 -0
- package/plugins/benchmarks/templates/skills/project-evolve-scanner-template.yaml +75 -0
- package/plugins/benchmarks/templates/skills/project-evolve-template.yaml +72 -0
- package/plugins/benchmarks/templates/skills/project-evolve-verifier-template.yaml +51 -0
- package/plugins/benchmarks/templates/skills/skill-forge-template.yaml +117 -0
- package/plugins/benchmarks/templates/skills/startup-guard-template.yaml +103 -0
- package/plugins/benchmarks/templates/skills/testing-stage-template.yaml +146 -0
- package/plugins/benchmarks/templates/skills/waterfall-iteration-template.yaml +55 -0
- package/plugins/commands/README.en.md +2 -2
- package/plugins/commands/README.md +2 -2
- package/plugins/commands/apply.md +102 -16
- package/plugins/commands/archive.md +60 -4
- package/plugins/commands/env-sync.md +1047 -406
- package/plugins/commands/env-template.md +11 -135
- package/plugins/commands/env-update.md +1 -1
- package/plugins/commands/env-validate.md +3 -3
- package/plugins/commands/explore.md +118 -1
- package/plugins/commands/field-evolve.md +51 -175
- package/plugins/commands/project-evolve.md +167 -68
- package/plugins/commands/propose.md +97 -6
- package/plugins/commands/review.md +5 -5
- package/plugins/commands/run.md +841 -13
- package/plugins/commands/status.md +138 -17
- package/plugins/commands/test.md +389 -0
- package/plugins/hooks/constitution-guard.js +1 -1
- package/plugins/hooks/environment-autocommit.js +366 -24
- package/plugins/hooks/environment-manager.js +3 -2
- package/plugins/hooks/execution-tracker.js +109 -4
- package/plugins/hooks/layer1-validator.js +117 -1
- package/plugins/hooks/lib/auto-fix-loop.js +605 -0
- package/plugins/hooks/lib/environment-config-loader.js +11 -7
- package/plugins/hooks/lib/hook-state-manager.js +98 -0
- package/plugins/hooks/lib/memory-extractor.js +27 -5
- package/plugins/hooks/lib/memory-manager.js +1 -1
- package/plugins/hooks/lib/test-auto-fix.test.js +194 -0
- package/plugins/hooks/monitoring-trigger.js +467 -0
- package/plugins/skills/README.en.md +15 -3
- package/plugins/skills/README.md +21 -11
- package/plugins/skills/agile-iteration/SKILL.md +187 -0
- package/plugins/skills/delivery-stage/SKILL.md +133 -12
- package/plugins/skills/design-stage/SKILL.md +103 -12
- package/plugins/skills/experiment-evaluator/SKILL.md +271 -0
- package/plugins/skills/experiment-iteration/SKILL.md +154 -0
- package/plugins/skills/exploration-phase/SKILL.md +93 -10
- package/plugins/skills/field-evolve-analyzer/SKILL.md +65 -0
- package/plugins/skills/field-evolve-distiller/SKILL.md +66 -0
- package/plugins/skills/field-evolve-executor/SKILL.md +94 -0
- package/plugins/skills/field-evolve-executor/executor.js +342 -0
- package/plugins/skills/field-evolve-fixer/SKILL.md +69 -0
- package/plugins/skills/field-evolve-learner/SKILL.md +65 -0
- package/plugins/skills/field-evolve-scanner/SKILL.md +87 -0
- package/plugins/skills/field-evolve-scanner/scripts/fallback-scanner.js +288 -0
- package/plugins/skills/field-evolve-verifier/SKILL.md +64 -0
- package/plugins/skills/hotfix-iteration/SKILL.md +279 -0
- package/plugins/skills/implementation-stage/SKILL.md +156 -15
- package/plugins/skills/layer1-validation/SKILL.md +1 -1
- package/plugins/skills/pending-dashboard/SKILL.md +9 -8
- package/plugins/skills/project-evolve-analyzer/SKILL.md +95 -0
- package/plugins/skills/project-evolve-fixer/SKILL.md +99 -0
- package/plugins/skills/project-evolve-generator/SKILL.md +149 -0
- package/plugins/skills/project-evolve-learner/SKILL.md +103 -0
- package/plugins/skills/project-evolve-reviewer/SKILL.md +104 -0
- package/plugins/skills/project-evolve-scanner/SKILL.md +95 -0
- package/plugins/skills/project-evolve-scanner/scripts/dependency-reuse-checker.js +395 -0
- package/plugins/skills/project-evolve-scanner/scripts/subsystem-coverage.js +315 -0
- package/plugins/skills/project-evolve-verifier/SKILL.md +105 -0
- package/plugins/skills/requirement-stage/SKILL.md +47 -13
- package/plugins/skills/skill-forge/SKILL.md +2 -2
- package/plugins/skills/testing-stage/SKILL.md +583 -8
- package/plugins/skills/waterfall-iteration/SKILL.md +115 -0
- package/scripts/cli/index.js +1 -1
- package/scripts/cli/init.js +30 -4
- package/scripts/cli/list.js +3 -2
- package/scripts/config/commands.config.js +8 -8
- package/scripts/config/hooks.config.js +1 -1
- package/scripts/install/constants.js +204 -165
- package/scripts/state.js +210 -1
- package/knowledge/config/README.en.md +0 -44
- package/knowledge/config/README.md +0 -44
- package/knowledge/config/role-composition.yaml +0 -98
- package/knowledge/config/team-triggers.yaml +0 -198
- package/knowledge/domain/README.md +0 -115
- package/knowledge/domain/flows/README.md +0 -194
- package/knowledge/domain/glossary.md +0 -143
- package/knowledge/domain/rules.md +0 -138
- package/knowledge/environment/component-knowledge.md +0 -316
- package/knowledge/environment/detection-patterns.yaml +0 -502
- package/knowledge/environment/template-registry.md +0 -321
- package/knowledge/guides/requirement-engineering.md +0 -329
- package/knowledge/guides/system-design.md +0 -352
- package/knowledge/principles/constitution.md +0 -134
- package/knowledge/principles/core-principles.md +0 -368
- package/knowledge/principles/design-philosophy.md +0 -877
- package/knowledge/process/README.en.md +0 -38
- package/knowledge/process/README.md +0 -48
- package/knowledge/templates/ai-evaluation.md +0 -150
- package/knowledge/templates/api-design.md +0 -117
- package/knowledge/templates/database-design.md +0 -132
- package/knowledge/templates/domain-driven-design.md +0 -321
- package/knowledge/templates/product-proposal.md +0 -201
- package/knowledge/templates/system-design.md +0 -227
- package/knowledge/templates/task-breakdown.md +0 -107
- package/knowledge/templates/test-case.md +0 -170
- package/plugins/commands/validate.md +0 -108
- package/plugins/skills/benchmark-executor/README.md +0 -93
- package/plugins/skills/evolution-process/SKILL.md +0 -291
- package/plugins/skills/project-evolution/SKILL.md +0 -847
- package/scripts/evolution/evolution-router.js +0 -273
- package/scripts/evolution/evolution-signal-collector.js +0 -307
- package/scripts/evolution/knowledge-loader.js +0 -346
- package/scripts/evolution/marketplace.js +0 -317
- package/scripts/evolution/version-manager.js +0 -371
- /package/knowledge/{process → 02-process}/01-requirement.md +0 -0
- /package/knowledge/{process → 02-process}/02-design.md +0 -0
- /package/knowledge/{process → 02-process}/03-implementation.md +0 -0
- /package/knowledge/{process → 02-process}/04-review.md +0 -0
- /package/knowledge/{process → 02-process}/05-testing.md +0 -0
- /package/knowledge/{process → 02-process}/06-delivery.md +0 -0
- /package/knowledge/{guides/stages/design-planner.md → 03-guides/03-design-solution.md} +0 -0
- /package/knowledge/{guides/stages/code-implementer.md → 03-guides/04-implement-code.md} +0 -0
- /package/knowledge/{guides/stages/test-planner.md → 03-guides/05-plan-testing.md} +0 -0
- /package/knowledge/{guides/stages/test-generator.md → 03-guides/06-generate-tests.md} +0 -0
- /package/knowledge/{guides/stages/release-checker.md → 03-guides/07-check-release.md} +0 -0
- /package/knowledge/{guides/stages/requirement-reviewer.md → 03-guides/09-review-requirement.md} +0 -0
- /package/knowledge/{guides/stages/design-reviewer.md → 03-guides/10-review-design.md} +0 -0
- /package/knowledge/{guides/stages/code-reviewer.md → 03-guides/11-review-code.md} +0 -0
- /package/knowledge/{guides/stages/test-reviewer.md → 03-guides/12-review-testing.md} +0 -0
- /package/knowledge/{guides/stages/security-reviewer.md → 03-guides/13-audit-security.md} +0 -0
- /package/knowledge/{guides/stages/consistency-checker.md → 03-guides/14-check-consistency.md} +0 -0
- /package/knowledge/{guides/stages/unit-test-runner.md → 03-guides/15-run-unit-tests.md} +0 -0
- /package/knowledge/{guides/stages/integration-test-runner.md → 03-guides/16-run-integration-tests.md} +0 -0
- /package/knowledge/{guides/stages/test-context-analyzer.md → 03-guides/17-analyze-test-context.md} +0 -0
- /package/knowledge/{guides/support/practice-logger.md → 03-guides/18-log-practice.md} +0 -0
- /package/knowledge/{guides/support/skill-validator.md → 03-guides/21-validate-skill.md} +0 -0
- /package/knowledge/{guides/support/scope-inference.md → 03-guides/23-infer-scope.md} +0 -0
- /package/knowledge/{guides/support/component-discovery.md → 03-guides/25-discover-component.md} +0 -0
- /package/knowledge/{guides/support/environment-scanner.md → 03-guides/27-scan-environment.md} +0 -0
- /package/knowledge/{guides/support/environment-validator.md → 03-guides/28-validate-environment.md} +0 -0
- /package/knowledge/{guides/support/knowledge-generator.md → 03-guides/29-generate-knowledge.md} +0 -0
- /package/knowledge/{guides/support/ai-capability-analyzer.md → 03-guides/30-analyze-ai-capability.md} +0 -0
- /package/knowledge/{guides/support/ai-component-analyzer.md → 03-guides/31-analyze-ai-component.md} +0 -0
- /package/knowledge/{guides/support/ai-agent-analyzer.md → 03-guides/32-analyze-ai-agent.md} +0 -0
- /package/knowledge/{guides/support/ai-rag-analyzer.md → 03-guides/33-analyze-ai-rag.md} +0 -0
- /package/knowledge/{guides/support/ai-task-assessor.md → 03-guides/34-assess-ai-task.md} +0 -0
- /package/knowledge/{guides/support/ai-pipeline-evaluator.md → 03-guides/35-evaluate-ai-pipeline.md} +0 -0
- /package/knowledge/{guides/support/ai-artifact-evaluator.md → 03-guides/36-evaluate-ai-artifact.md} +0 -0
- /package/knowledge/{guides/support/ai-evaluation-planner.md → 03-guides/37-plan-ai-evaluation.md} +0 -0
- /package/knowledge/{guides/support/ai-path-evaluator.md → 03-guides/38-evaluate-ai-path.md} +0 -0
- /package/knowledge/{guides/support/ai-data-validator.md → 03-guides/39-validate-ai-data.md} +0 -0
- /package/knowledge/{guides/support/ai-anomaly-analyzer.md → 03-guides/40-detect-ai-anomaly.md} +0 -0
- /package/knowledge/{guides/support/ai-test-diagnostics.md → 03-guides/41-diagnose-ai-test.md} +0 -0
- /package/knowledge/{guides/support/test-runner.md → 03-guides/45-test-runner.md} +0 -0
- /package/knowledge/{checklists/test.md → 04-checklists/03-test.md} +0 -0
- /package/knowledge/{config/team-stage.yaml → 05-config/01-team-stage.yaml} +0 -0
- /package/knowledge/{config/role-extensions.yaml → 05-config/04-role-extensions.yaml} +0 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Implementation-Stage
|
|
2
|
+
# 适用于: 测试 implementation-stage skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-implementation-stage"
|
|
7
|
+
description: "Implementation-Stage Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: implementation-stage
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "simple-implementation"
|
|
14
|
+
input:
|
|
15
|
+
context: "实现 {feature-name} API"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取设计文档"
|
|
19
|
+
- "实现 API 端点"
|
|
20
|
+
- "实现业务逻辑"
|
|
21
|
+
- "编写单元测试"
|
|
22
|
+
- "执行 Layer1 验证"
|
|
23
|
+
expectedOutput:
|
|
24
|
+
- "src/{feature}/*.js"
|
|
25
|
+
- "tests/{feature}/*.test.js"
|
|
26
|
+
- "Layer1 验证报告"
|
|
27
|
+
successCriteria:
|
|
28
|
+
- "API 实现符合设计文档"
|
|
29
|
+
- "单元测试覆盖核心逻辑"
|
|
30
|
+
- "Layer1 验证通过"
|
|
31
|
+
qualityMetrics:
|
|
32
|
+
- "测试覆盖率 >= 70%"
|
|
33
|
+
- "代码质量评分 >= 80%"
|
|
34
|
+
maxDuration: 600
|
|
35
|
+
|
|
36
|
+
- name: "multi-system-implementation"
|
|
37
|
+
input:
|
|
38
|
+
context: "实现 {feature},包含后端和前端"
|
|
39
|
+
complexity: 5
|
|
40
|
+
expectedBehaviors:
|
|
41
|
+
- "读取多系统设计文档"
|
|
42
|
+
- "先实现后端 API"
|
|
43
|
+
- "执行后端 Layer1 验证"
|
|
44
|
+
- "再实现前端页面"
|
|
45
|
+
- "执行集成验证"
|
|
46
|
+
expectedOutput:
|
|
47
|
+
- "backend/src/{feature}/*.js"
|
|
48
|
+
- "frontend/src/pages/{feature}/*.jsx"
|
|
49
|
+
- "集成测试报告"
|
|
50
|
+
successCriteria:
|
|
51
|
+
- "依赖顺序正确"
|
|
52
|
+
- "集成测试通过"
|
|
53
|
+
qualityMetrics:
|
|
54
|
+
- "后端测试覆盖率 >= 70%"
|
|
55
|
+
- "集成测试通过率 >= 90%"
|
|
56
|
+
maxDuration: 1200
|
|
57
|
+
|
|
58
|
+
- name: "brownfield-implementation"
|
|
59
|
+
input:
|
|
60
|
+
context: "在现有项目中添加 {feature} 功能"
|
|
61
|
+
complexity: 3
|
|
62
|
+
expectedBehaviors:
|
|
63
|
+
- "执行 Brownfield 扫描"
|
|
64
|
+
- "识别现有代码结构"
|
|
65
|
+
- "复用现有依赖包"
|
|
66
|
+
- "遵循现有代码风格"
|
|
67
|
+
expectedOutput:
|
|
68
|
+
- "现有代码分析报告"
|
|
69
|
+
- "src/{feature}/*.js"
|
|
70
|
+
- "依赖复用检查报告"
|
|
71
|
+
successCriteria:
|
|
72
|
+
- "正确识别现有代码结构"
|
|
73
|
+
- "遵循现有代码风格"
|
|
74
|
+
- "与现有代码无缝集成"
|
|
75
|
+
qualityMetrics:
|
|
76
|
+
- "Brownfield 扫描执行率 = 100%"
|
|
77
|
+
- "代码风格符合率 >= 90%"
|
|
78
|
+
maxDuration: 900
|
|
79
|
+
|
|
80
|
+
- name: "ai-implementation"
|
|
81
|
+
input:
|
|
82
|
+
context: "实现 LLM 智能问答功能"
|
|
83
|
+
complexity: 5
|
|
84
|
+
expectedBehaviors:
|
|
85
|
+
- "实现模型加载逻辑"
|
|
86
|
+
- "实现推理接口"
|
|
87
|
+
- "实现效果评测脚本"
|
|
88
|
+
- "执行效果评测验证"
|
|
89
|
+
expectedOutput:
|
|
90
|
+
- "ai/model_loader.py"
|
|
91
|
+
- "ai/inference.py"
|
|
92
|
+
- "ai/evaluation.py"
|
|
93
|
+
- "效果评测报告"
|
|
94
|
+
successCriteria:
|
|
95
|
+
- "模型加载正确"
|
|
96
|
+
- "推理接口响应时间 < 2s"
|
|
97
|
+
- "评测指标达标"
|
|
98
|
+
qualityMetrics:
|
|
99
|
+
- "AI 组件完整率 >= 90%"
|
|
100
|
+
- "响应时间达标率 = 100%"
|
|
101
|
+
maxDuration: 900
|
|
102
|
+
|
|
103
|
+
- name: "db-migration-implementation"
|
|
104
|
+
input:
|
|
105
|
+
context: "实现数据库迁移,添加 {field} 字段"
|
|
106
|
+
complexity: 3
|
|
107
|
+
expectedBehaviors:
|
|
108
|
+
- "读取数据库设计文档"
|
|
109
|
+
- "编写迁移脚本(向上/向下)"
|
|
110
|
+
- "验证迁移安全性"
|
|
111
|
+
- "执行迁移测试"
|
|
112
|
+
expectedOutput:
|
|
113
|
+
- "migrations/add_{field}.js"
|
|
114
|
+
- "migrations/add_{field}.down.js"
|
|
115
|
+
- "迁移验证报告"
|
|
116
|
+
successCriteria:
|
|
117
|
+
- "迁移脚本完整"
|
|
118
|
+
- "迁移安全性验证通过"
|
|
119
|
+
- "数据完整性检查通过"
|
|
120
|
+
qualityMetrics:
|
|
121
|
+
- "迁移安全性 = 100%"
|
|
122
|
+
- "数据完整性 = 100%"
|
|
123
|
+
maxDuration: 450
|
|
124
|
+
|
|
125
|
+
successCriteria:
|
|
126
|
+
passRate: 85
|
|
127
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Layer1-Validation
|
|
2
|
+
# 适用于: 测试 layer1-validation skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-layer1-validation"
|
|
7
|
+
description: "Layer1-Validation Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: layer1-validation
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "single-system-validation"
|
|
14
|
+
input:
|
|
15
|
+
context: "对 Node.js 项目进行 Layer1 验证"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "识别项目类型"
|
|
19
|
+
- "执行编译检查"
|
|
20
|
+
- "执行测试"
|
|
21
|
+
- "执行 Lint 检查"
|
|
22
|
+
- "执行类型检查"
|
|
23
|
+
expectedOutput:
|
|
24
|
+
- "验证报告表格"
|
|
25
|
+
- "各项检查状态(PASS/FAIL/BLOCKED)"
|
|
26
|
+
- "日志摘要"
|
|
27
|
+
successCriteria:
|
|
28
|
+
- "验证步骤完整执行"
|
|
29
|
+
- "结果真实(有执行日志)"
|
|
30
|
+
- "状态标注正确"
|
|
31
|
+
qualityMetrics:
|
|
32
|
+
- "验证执行率 = 100%"
|
|
33
|
+
- "结果准确率 = 100%"
|
|
34
|
+
maxDuration: 300
|
|
35
|
+
|
|
36
|
+
- name: "multi-system-validation"
|
|
37
|
+
input:
|
|
38
|
+
context: "对多系统项目(backend/frontend)进行 Layer1 验证"
|
|
39
|
+
complexity: 3
|
|
40
|
+
expectedBehaviors:
|
|
41
|
+
- "读取 config.json 获取 subsystems 配置"
|
|
42
|
+
- "按依赖顺序验证"
|
|
43
|
+
- "对每个子系统执行 build/test/lint/typeCheck"
|
|
44
|
+
- "汇总验证结果"
|
|
45
|
+
expectedOutput:
|
|
46
|
+
- "多系统验证报告"
|
|
47
|
+
- "各子系统验证状态"
|
|
48
|
+
- "整体验证结论"
|
|
49
|
+
successCriteria:
|
|
50
|
+
- "子系统识别正确"
|
|
51
|
+
- "验证顺序正确"
|
|
52
|
+
- "所有子系统验证完成"
|
|
53
|
+
qualityMetrics:
|
|
54
|
+
- "子系统识别率 = 100%"
|
|
55
|
+
- "验证顺序正确率 = 100%"
|
|
56
|
+
maxDuration: 600
|
|
57
|
+
|
|
58
|
+
- name: "ai-subsystem-validation"
|
|
59
|
+
input:
|
|
60
|
+
context: "对 AI 子系统进行 Layer1 验证"
|
|
61
|
+
complexity: 3
|
|
62
|
+
expectedBehaviors:
|
|
63
|
+
- "识别 AI 子系统类型"
|
|
64
|
+
- "执行模型加载验证"
|
|
65
|
+
- "执行推理验证"
|
|
66
|
+
- "执行评测脚本验证"
|
|
67
|
+
expectedOutput:
|
|
68
|
+
- "AI 子系统验证报告"
|
|
69
|
+
- "模型加载状态"
|
|
70
|
+
- "推理验证结果"
|
|
71
|
+
- "评测脚本执行结果"
|
|
72
|
+
successCriteria:
|
|
73
|
+
- "AI 组件识别正确"
|
|
74
|
+
- "评测脚本执行完整"
|
|
75
|
+
qualityMetrics:
|
|
76
|
+
- "AI 组件识别率 = 100%"
|
|
77
|
+
- "评测执行率 = 100%"
|
|
78
|
+
maxDuration: 600
|
|
79
|
+
|
|
80
|
+
- name: "data-subsystem-validation"
|
|
81
|
+
input:
|
|
82
|
+
context: "对数据子系统进行 Layer1 验证"
|
|
83
|
+
complexity: 3
|
|
84
|
+
expectedBehaviors:
|
|
85
|
+
- "识别数据子系统类型"
|
|
86
|
+
- "执行数据质量检查"
|
|
87
|
+
- "执行 ETL 流程验证"
|
|
88
|
+
expectedOutput:
|
|
89
|
+
- "数据子系统验证报告"
|
|
90
|
+
- "数据质量检查结果"
|
|
91
|
+
- "ETL 流程验证结果"
|
|
92
|
+
successCriteria:
|
|
93
|
+
- "数据组件识别正确"
|
|
94
|
+
- "数据质量检查完整"
|
|
95
|
+
qualityMetrics:
|
|
96
|
+
- "数据组件识别率 = 100%"
|
|
97
|
+
- "数据质量检查率 = 100%"
|
|
98
|
+
maxDuration: 450
|
|
99
|
+
|
|
100
|
+
- name: "validation-failure"
|
|
101
|
+
input:
|
|
102
|
+
context: "Layer1 验证失败时的正确处理"
|
|
103
|
+
complexity: 1
|
|
104
|
+
expectedBehaviors:
|
|
105
|
+
- "执行验证"
|
|
106
|
+
- "检测失败项"
|
|
107
|
+
- "状态标注为 FAIL"
|
|
108
|
+
- "记录失败日志"
|
|
109
|
+
- "不进入 Layer2 审查"
|
|
110
|
+
successCriteria:
|
|
111
|
+
- "失败正确识别"
|
|
112
|
+
- "状态标注正确"
|
|
113
|
+
- "正确阻止进入 Layer2"
|
|
114
|
+
qualityMetrics:
|
|
115
|
+
- "失败检出率 = 100%"
|
|
116
|
+
- "状态标注准确率 = 100%"
|
|
117
|
+
maxDuration: 180
|
|
118
|
+
|
|
119
|
+
successCriteria:
|
|
120
|
+
passRate: 90
|
|
121
|
+
avgFieldCompletion: 95
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Project-Evolve-Analyzer
|
|
2
|
+
# 适用于: 测试 project-evolve-analyzer skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-project-evolve-analyzer"
|
|
7
|
+
description: "Project-Evolve-Analyzer Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: project-evolve-analyzer
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "problem-analysis"
|
|
14
|
+
input:
|
|
15
|
+
context: "分析项目问题"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取扫描结果"
|
|
19
|
+
- "识别问题模式"
|
|
20
|
+
- "分析根因"
|
|
21
|
+
- "评估影响范围"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "问题分析报告"
|
|
24
|
+
- "根因分析"
|
|
25
|
+
- "影响评估"
|
|
26
|
+
successCriteria:
|
|
27
|
+
- "问题模式识别准确"
|
|
28
|
+
- "根因分析深入"
|
|
29
|
+
qualityMetrics:
|
|
30
|
+
- "根因识别率 >= 85%"
|
|
31
|
+
maxDuration: 300
|
|
32
|
+
|
|
33
|
+
- name: "priority-calculation"
|
|
34
|
+
input:
|
|
35
|
+
context: "计算问题修复优先级"
|
|
36
|
+
complexity: 1
|
|
37
|
+
expectedBehaviors:
|
|
38
|
+
- "评估影响程度"
|
|
39
|
+
- "评估修复难度"
|
|
40
|
+
- "计算优先级分数"
|
|
41
|
+
expectedOutput:
|
|
42
|
+
- "优先级排序列表"
|
|
43
|
+
successCriteria:
|
|
44
|
+
- "优先级计算合理"
|
|
45
|
+
qualityMetrics:
|
|
46
|
+
- "优先级合理性 >= 90%"
|
|
47
|
+
maxDuration: 120
|
|
48
|
+
|
|
49
|
+
successCriteria:
|
|
50
|
+
passRate: 85
|
|
51
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Project-Evolve-Fixer
|
|
2
|
+
# 适用于: 测试 project-evolve-fixer skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-project-evolve-fixer"
|
|
7
|
+
description: "Project-Evolve-Fixer Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: project-evolve-fixer
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "auto-fix"
|
|
14
|
+
input:
|
|
15
|
+
context: "自动修复检测到的问题"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取问题清单"
|
|
19
|
+
- "分类可自动修复问题"
|
|
20
|
+
- "执行修复"
|
|
21
|
+
- "验证修复效果"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "修复记录"
|
|
24
|
+
- "验证报告"
|
|
25
|
+
successCriteria:
|
|
26
|
+
- "修复有效"
|
|
27
|
+
- "无退化发生"
|
|
28
|
+
qualityMetrics:
|
|
29
|
+
- "修复有效率 >= 85%"
|
|
30
|
+
- "退化检出率 = 100%"
|
|
31
|
+
maxDuration: 600
|
|
32
|
+
|
|
33
|
+
- name: "manual-review-required"
|
|
34
|
+
input:
|
|
35
|
+
context: "处理需要人工审查的问题"
|
|
36
|
+
complexity: 1
|
|
37
|
+
expectedBehaviors:
|
|
38
|
+
- "识别需人工审查问题"
|
|
39
|
+
- "生成审查建议"
|
|
40
|
+
- "不自动执行"
|
|
41
|
+
expectedOutput:
|
|
42
|
+
- "审查建议清单"
|
|
43
|
+
successCriteria:
|
|
44
|
+
- "分类正确"
|
|
45
|
+
- "不自动执行"
|
|
46
|
+
qualityMetrics:
|
|
47
|
+
- "分类准确率 = 100%"
|
|
48
|
+
maxDuration: 120
|
|
49
|
+
|
|
50
|
+
successCriteria:
|
|
51
|
+
passRate: 85
|
|
52
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Project-Evolve-Generator
|
|
2
|
+
# 适用于: 测试 project-evolve-generator skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-project-evolve-generator"
|
|
7
|
+
description: "Project-Evolve-Generator Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: project-evolve-generator
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "generate-benchmark"
|
|
14
|
+
input:
|
|
15
|
+
context: "生成项目 benchmark"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "检测项目类型"
|
|
19
|
+
- "生成基础 benchmark"
|
|
20
|
+
- "生成 pipeline benchmark"
|
|
21
|
+
expectedOutput:
|
|
22
|
+
- "project-generated.yaml"
|
|
23
|
+
- "generate-report.md"
|
|
24
|
+
successCriteria:
|
|
25
|
+
- "项目类型识别正确"
|
|
26
|
+
- "benchmark 结构完整"
|
|
27
|
+
qualityMetrics:
|
|
28
|
+
- "类型识别率 = 100%"
|
|
29
|
+
- "benchmark 完整率 >= 90%"
|
|
30
|
+
maxDuration: 300
|
|
31
|
+
|
|
32
|
+
successCriteria:
|
|
33
|
+
passRate: 85
|
|
34
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Project-Evolve-Learner
|
|
2
|
+
# 适用于: 测试 project-evolve-learner skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-project-evolve-learner"
|
|
7
|
+
description: "Project-Evolve-Learner Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: project-evolve-learner
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "pattern-learning"
|
|
14
|
+
input:
|
|
15
|
+
context: "从实践日志中学习模式"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取 practice-log"
|
|
19
|
+
- "识别重复模式"
|
|
20
|
+
- "提取通用解决方案"
|
|
21
|
+
expectedOutput:
|
|
22
|
+
- "模式识别报告"
|
|
23
|
+
- "通用解决方案"
|
|
24
|
+
successCriteria:
|
|
25
|
+
- "模式识别准确"
|
|
26
|
+
- "解决方案合理"
|
|
27
|
+
qualityMetrics:
|
|
28
|
+
- "模式识别率 >= 80%"
|
|
29
|
+
maxDuration: 300
|
|
30
|
+
|
|
31
|
+
- name: "knowledge-update"
|
|
32
|
+
input:
|
|
33
|
+
context: "更新项目知识"
|
|
34
|
+
complexity: 3
|
|
35
|
+
expectedBehaviors:
|
|
36
|
+
- "分析新发现"
|
|
37
|
+
- "更新知识文件"
|
|
38
|
+
- "验证更新效果"
|
|
39
|
+
expectedOutput:
|
|
40
|
+
- "更新后的知识文件"
|
|
41
|
+
- "更新报告"
|
|
42
|
+
successCriteria:
|
|
43
|
+
- "知识更新正确"
|
|
44
|
+
qualityMetrics:
|
|
45
|
+
- "更新准确率 >= 90%"
|
|
46
|
+
maxDuration: 300
|
|
47
|
+
|
|
48
|
+
successCriteria:
|
|
49
|
+
passRate: 80
|
|
50
|
+
avgFieldCompletion: 85
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Project-Evolve-Reviewer
|
|
2
|
+
# 适用于: 测试 project-evolve-reviewer skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-project-evolve-reviewer"
|
|
7
|
+
description: "Project-Evolve-Reviewer Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: project-evolve-reviewer
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "code-review"
|
|
14
|
+
input:
|
|
15
|
+
context: "审查代码变更"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "读取变更内容"
|
|
19
|
+
- "执行代码审查"
|
|
20
|
+
- "识别问题"
|
|
21
|
+
- "生成审查报告"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "审查报告"
|
|
24
|
+
- "问题清单"
|
|
25
|
+
successCriteria:
|
|
26
|
+
- "审查维度完整"
|
|
27
|
+
- "问题识别准确"
|
|
28
|
+
qualityMetrics:
|
|
29
|
+
- "问题检出率 >= 85%"
|
|
30
|
+
maxDuration: 300
|
|
31
|
+
|
|
32
|
+
- name: "design-review"
|
|
33
|
+
input:
|
|
34
|
+
context: "审查设计文档"
|
|
35
|
+
complexity: 3
|
|
36
|
+
expectedBehaviors:
|
|
37
|
+
- "读取设计文档"
|
|
38
|
+
- "执行设计审查"
|
|
39
|
+
- "检查一致性"
|
|
40
|
+
expectedOutput:
|
|
41
|
+
- "设计审查报告"
|
|
42
|
+
successCriteria:
|
|
43
|
+
- "设计审查完整"
|
|
44
|
+
qualityMetrics:
|
|
45
|
+
- "一致性检查执行率 = 100%"
|
|
46
|
+
maxDuration: 300
|
|
47
|
+
|
|
48
|
+
successCriteria:
|
|
49
|
+
passRate: 85
|
|
50
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Project-Evolve-Scanner
|
|
2
|
+
# 适用于: 测试 project-evolve-scanner skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-project-evolve-scanner"
|
|
7
|
+
description: "Project-Evolve-Scanner Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: project-evolve-scanner
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "nodejs-project-scan"
|
|
14
|
+
input:
|
|
15
|
+
context: "扫描一个 Node.js + React 项目"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "检测 package.json"
|
|
19
|
+
- "识别技术栈"
|
|
20
|
+
- "分析目录结构"
|
|
21
|
+
- "发现评测点"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "项目特征报告"
|
|
24
|
+
- "技术栈列表"
|
|
25
|
+
- "评测点列表"
|
|
26
|
+
successCriteria:
|
|
27
|
+
- "技术栈检测正确"
|
|
28
|
+
- "评测点发现完整"
|
|
29
|
+
qualityMetrics:
|
|
30
|
+
- "技术栈识别准确率 >= 95%"
|
|
31
|
+
- "评测点发现率 >= 80%"
|
|
32
|
+
maxDuration: 300
|
|
33
|
+
|
|
34
|
+
- name: "java-project-scan"
|
|
35
|
+
input:
|
|
36
|
+
context: "扫描一个 Java Spring Boot 项目"
|
|
37
|
+
complexity: 3
|
|
38
|
+
expectedBehaviors:
|
|
39
|
+
- "检测 pom.xml 或 build.gradle"
|
|
40
|
+
- "识别技术栈"
|
|
41
|
+
- "分析依赖版本"
|
|
42
|
+
expectedOutput:
|
|
43
|
+
- "项目特征报告"
|
|
44
|
+
- "依赖分析报告"
|
|
45
|
+
successCriteria:
|
|
46
|
+
- "技术栈检测正确"
|
|
47
|
+
- "依赖分析完整"
|
|
48
|
+
qualityMetrics:
|
|
49
|
+
- "技术栈识别准确率 >= 95%"
|
|
50
|
+
- "依赖分析完整率 >= 90%"
|
|
51
|
+
maxDuration: 450
|
|
52
|
+
|
|
53
|
+
- name: "multi-system-scan"
|
|
54
|
+
input:
|
|
55
|
+
context: "扫描包含前后端的多子系统项目"
|
|
56
|
+
complexity: 5
|
|
57
|
+
expectedBehaviors:
|
|
58
|
+
- "检测多个子系统"
|
|
59
|
+
- "识别各子系统技术栈"
|
|
60
|
+
- "分析子系统依赖关系"
|
|
61
|
+
- "生成记忆结构"
|
|
62
|
+
expectedOutput:
|
|
63
|
+
- "多系统特征报告"
|
|
64
|
+
- "系统间依赖关系图"
|
|
65
|
+
successCriteria:
|
|
66
|
+
- "子系统识别完整"
|
|
67
|
+
- "依赖关系清晰"
|
|
68
|
+
qualityMetrics:
|
|
69
|
+
- "子系统识别率 = 100%"
|
|
70
|
+
- "技术栈识别准确率 >= 95%"
|
|
71
|
+
maxDuration: 600
|
|
72
|
+
|
|
73
|
+
successCriteria:
|
|
74
|
+
passRate: 85
|
|
75
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Project-Evolve
|
|
2
|
+
# 适用于: 测试 project-evolve skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-project-evolve"
|
|
7
|
+
description: "Project-Evolve Skill 基准测试 - AI Native 项目自进化"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: project-evolve
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "init-memory"
|
|
14
|
+
input:
|
|
15
|
+
context: "初始化项目记忆"
|
|
16
|
+
complexity: 1
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "扫描项目技术栈"
|
|
19
|
+
- "检测项目结构"
|
|
20
|
+
- "自动发现评测点"
|
|
21
|
+
- "创建记忆目录结构"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- ".autospec/memory/index.yaml"
|
|
24
|
+
- "初始化报告"
|
|
25
|
+
successCriteria:
|
|
26
|
+
- "技术栈识别正确"
|
|
27
|
+
- "评测点发现 >= 10 个"
|
|
28
|
+
qualityMetrics:
|
|
29
|
+
- "记忆创建率 = 100%"
|
|
30
|
+
maxDuration: 300
|
|
31
|
+
|
|
32
|
+
- name: "cruise-mode"
|
|
33
|
+
input:
|
|
34
|
+
context: "智能巡航模式"
|
|
35
|
+
complexity: 3
|
|
36
|
+
expectedBehaviors:
|
|
37
|
+
- "加载 validated 记忆"
|
|
38
|
+
- "执行代码扫描"
|
|
39
|
+
- "智能优先级排序"
|
|
40
|
+
- "L1 自动修复"
|
|
41
|
+
expectedOutput:
|
|
42
|
+
- "cruise-report.md"
|
|
43
|
+
- "auto-fixes.json"
|
|
44
|
+
successCriteria:
|
|
45
|
+
- "巡航执行完整"
|
|
46
|
+
- "自动修复正确"
|
|
47
|
+
qualityMetrics:
|
|
48
|
+
- "自动修复准确率 >= 90%"
|
|
49
|
+
maxDuration: 600
|
|
50
|
+
|
|
51
|
+
- name: "deep-evolution"
|
|
52
|
+
input:
|
|
53
|
+
context: "深度进化模式"
|
|
54
|
+
complexity: 5
|
|
55
|
+
expectedBehaviors:
|
|
56
|
+
- "六层全维度扫描"
|
|
57
|
+
- "关联分析"
|
|
58
|
+
- "生成改进方案"
|
|
59
|
+
- "区分 auto-fixable/manual-review"
|
|
60
|
+
expectedOutput:
|
|
61
|
+
- "deep-report.md"
|
|
62
|
+
- "improvements.md"
|
|
63
|
+
successCriteria:
|
|
64
|
+
- "六层扫描完整"
|
|
65
|
+
- "改进方案可执行"
|
|
66
|
+
qualityMetrics:
|
|
67
|
+
- "维度覆盖率 = 100%"
|
|
68
|
+
maxDuration: 1200
|
|
69
|
+
|
|
70
|
+
successCriteria:
|
|
71
|
+
passRate: 85
|
|
72
|
+
avgFieldCompletion: 90
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# AutoSpec Skill Benchmark Template - Project-Evolve-Verifier
|
|
2
|
+
# 适用于: 测试 project-evolve-verifier skill
|
|
3
|
+
# init 后复制到 .autospec/benchmarks/ 后按需修改
|
|
4
|
+
|
|
5
|
+
version: "1.0"
|
|
6
|
+
name: "skill-project-evolve-verifier"
|
|
7
|
+
description: "Project-Evolve-Verifier Skill 基准测试"
|
|
8
|
+
|
|
9
|
+
type: skill
|
|
10
|
+
target: project-evolve-verifier
|
|
11
|
+
|
|
12
|
+
testCases:
|
|
13
|
+
- name: "fix-verification"
|
|
14
|
+
input:
|
|
15
|
+
context: "验证修复效果"
|
|
16
|
+
complexity: 3
|
|
17
|
+
expectedBehaviors:
|
|
18
|
+
- "重新执行失败测试"
|
|
19
|
+
- "对比修复前后"
|
|
20
|
+
- "检测退化"
|
|
21
|
+
- "计算质量分数"
|
|
22
|
+
expectedOutput:
|
|
23
|
+
- "验证报告"
|
|
24
|
+
- "质量分数对比"
|
|
25
|
+
successCriteria:
|
|
26
|
+
- "验证执行完整"
|
|
27
|
+
- "退化检测正确"
|
|
28
|
+
qualityMetrics:
|
|
29
|
+
- "验证完整率 = 100%"
|
|
30
|
+
- "退化检出率 = 100%"
|
|
31
|
+
maxDuration: 450
|
|
32
|
+
|
|
33
|
+
- name: "regression-check"
|
|
34
|
+
input:
|
|
35
|
+
context: "回归检测"
|
|
36
|
+
complexity: 3
|
|
37
|
+
expectedBehaviors:
|
|
38
|
+
- "执行全量测试"
|
|
39
|
+
- "对比历史结果"
|
|
40
|
+
- "识别退化"
|
|
41
|
+
expectedOutput:
|
|
42
|
+
- "回归报告"
|
|
43
|
+
successCriteria:
|
|
44
|
+
- "回归检测完整"
|
|
45
|
+
qualityMetrics:
|
|
46
|
+
- "退化检出率 = 100%"
|
|
47
|
+
maxDuration: 600
|
|
48
|
+
|
|
49
|
+
successCriteria:
|
|
50
|
+
passRate: 90
|
|
51
|
+
avgFieldCompletion: 95
|