npm - @chongyan/autospec - Versions diffs - 1.0.1 → 1.0.2 - Mend

@chongyan/autospec 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (283) hide show

package/plugins/benchmarks/templates/commands/field-evolve-template.yaml ADDED Viewed

@@ -0,0 +1,104 @@
+# AutoSpec Command Benchmark Template - Field-Evolve
+# 适用于: 测试 field-evolve 命令（实战项目自进化）
+# init 后复制到 .autospec/benchmarks/ 后按需修改
+version: "1.0"
+name: "command-field-evolve"
+description: "Field-Evolve 命令基准测试"
+type: command
+target: field-evolve
+testCases:
+  - name: "quick-mode"
+    input: "--mode=quick"
+    complexity: 1
+    expectedBehaviors:
+      - "读取 config.json"
+      - "执行编译检查"
+      - "执行单元测试"
+      - "执行 Lint 检查"
+    expectedOutput:
+      - "test-result.json"
+      - "quick-report.md"
+    successCriteria:
+      - "所有检查项执行"
+      - "测试结果记录完整"
+    qualityMetrics:
+      - "检查执行率 = 100%"
+    maxDuration: 300
+  - name: "deep-mode"
+    input: "--mode=deep"
+    complexity: 5
+    expectedBehaviors:
+      - "扫描 benchmarks"
+      - "执行 benchmark 测试场景"
+      - "计算三维度评分"
+    expectedOutput:
+      - "deep-report.md"
+      - "evaluation.json"
+    successCriteria:
+      - "benchmark 执行完整"
+      - "三维度评分计算正确"
+    qualityMetrics:
+      - "benchmark 执行率 = 100%"
+    maxDuration: 1800
+  - name: "full-cycle"
+    input: "--mode=full --auto-fix"
+    complexity: 5
+    expectedBehaviors:
+      - "执行深度测试"
+      - "生成改进方案"
+      - "执行自动修复"
+      - "效果验证"
+      - "技能蒸馏"
+    expectedOutput:
+      - "full-report.md"
+      - "distilled-skills/"
+    successCriteria:
+      - "完整循环执行成功"
+      - "无退化发生"
+    qualityMetrics:
+      - "修复有效率 >= 85%"
+    maxDuration: 2400
+  - name: "distill-mode"
+    input: "--mode=distill"
+    complexity: 3
+    expectedBehaviors:
+      - "读取进化规则"
+      - "收集 practice-log"
+      - "执行技能蒸馏"
+      - "验证进化效果"
+    expectedOutput:
+      - "distill-report.md"
+      - "distilled-skills/"
+    successCriteria:
+      - "practice-log 完整读取"
+      - "技能提炼符合规范"
+    qualityMetrics:
+      - "技能规范率 >= 90%"
+    maxDuration: 600
+  - name: "generate-mode"
+    input: "--mode=generate"
+    complexity: 1
+    expectedBehaviors:
+      - "检测项目类型"
+      - "生成基础 benchmark"
+      - "生成 pipeline benchmark"
+    expectedOutput:
+      - "project-generated.yaml"
+      - "generate-report.md"
+    successCriteria:
+      - "项目类型识别正确"
+      - "benchmark 结构完整"
+    qualityMetrics:
+      - "类型识别率 = 100%"
+    maxDuration: 180
+successCriteria:
+  passRate: 85
+  avgDuration: 600

package/plugins/benchmarks/templates/commands/project-evolve-template.yaml ADDED Viewed

@@ -0,0 +1,104 @@
+# AutoSpec Command Benchmark Template - Project-Evolve
+# 适用于: 测试 project-evolve 命令（AI Native 项目自进化）
+# init 后复制到 .autospec/benchmarks/ 后按需修改
+version: "1.0"
+name: "command-project-evolve"
+description: "Project-Evolve 命令基准测试"
+type: command
+target: project-evolve
+testCases:
+  - name: "init-memory"
+    input: "--init-memory"
+    complexity: 1
+    expectedBehaviors:
+      - "扫描项目技术栈"
+      - "检测项目结构"
+      - "自动发现评测点"
+      - "创建记忆目录结构"
+    expectedOutput:
+      - ".autospec/memory/index.yaml"
+      - "初始化报告"
+    successCriteria:
+      - "技术栈识别正确"
+      - "评测点发现 >= 10 个"
+    qualityMetrics:
+      - "记忆创建率 = 100%"
+    maxDuration: 300
+  - name: "cruise-mode"
+    input: "--cruise"
+    complexity: 3
+    expectedBehaviors:
+      - "加载 validated 记忆"
+      - "执行代码扫描"
+      - "智能优先级排序"
+      - "L1 自动修复"
+    expectedOutput:
+      - "cruise-report.md"
+      - "auto-fixes.json"
+    successCriteria:
+      - "巡航执行完整"
+      - "自动修复正确"
+    qualityMetrics:
+      - "自动修复准确率 >= 90%"
+    maxDuration: 600
+  - name: "deep-evolution"
+    input: "--deep"
+    complexity: 5
+    expectedBehaviors:
+      - "六层全维度扫描"
+      - "关联分析"
+      - "生成改进方案"
+      - "区分 auto-fixable/manual-review"
+    expectedOutput:
+      - "deep-report.md"
+      - "improvements.md"
+    successCriteria:
+      - "六层扫描完整"
+      - "改进方案可执行"
+    qualityMetrics:
+      - "维度覆盖率 = 100%"
+    maxDuration: 1200
+  - name: "multi-system-focus"
+    input: "--focus=multi"
+    complexity: 5
+    expectedBehaviors:
+      - "识别所有子系统"
+      - "检查接口契约一致性"
+      - "检测共享逻辑抽离机会"
+      - "检测依赖循环"
+    expectedOutput:
+      - "multi-system-report.md"
+      - "dependency-graph.md"
+    successCriteria:
+      - "子系统识别完整"
+      - "契约不一致问题检出"
+    qualityMetrics:
+      - "子系统识别率 = 100%"
+    maxDuration: 900
+  - name: "review-mode"
+    input: "--review --last=7d"
+    complexity: 3
+    expectedBehaviors:
+      - "读取历史执行记录"
+      - "统计指标变化趋势"
+      - "分析有效改进"
+    expectedOutput:
+      - "review-report.md"
+      - "metrics-trend.md"
+    successCriteria:
+      - "历史数据完整读取"
+      - "趋势分析准确"
+    qualityMetrics:
+      - "趋势分析准确率 >= 90%"
+    maxDuration: 450
+successCriteria:
+  passRate: 85
+  avgDuration: 600

package/plugins/benchmarks/templates/commands/propose-template.yaml ADDED Viewed

@@ -0,0 +1,88 @@
+# AutoSpec Command Benchmark Template - Propose
+# 适用于: 测试 propose 命令（方案设计）
+# init 后复制到 .autospec/benchmarks/ 后按需修改
+version: "1.0"
+name: "command-propose"
+description: "Propose 命令基准测试"
+type: command
+target: propose
+testCases:
+  - name: "simple-design"
+    input: "设计 {feature-name} 功能的方案"
+    complexity: 1
+    expectedArtifacts:
+      - "specs/{feature}/design.md"
+    expectedBehaviors:
+      - "读取需求文档"
+      - "设计 API 接口"
+      - "设计数据库表"
+    successCriteria:
+      - "设计文档存在"
+      - "包含 API 设计"
+      - "包含数据库设计"
+    qualityMetrics:
+      - "API 设计完整率 >= 90%"
+      - "数据库设计合理率 >= 90%"
+    maxDuration: 300
+  - name: "multi-system-design"
+    input: "设计 {feature} 功能，包含 {subsystems}"
+    complexity: 5
+    expectedArtifacts:
+      - "specs/{feature}/design/overview.md"
+      - "specs/{feature}/design/backend.md"
+      - "specs/{feature}/design/frontend.md"
+      - "contracts/api.yaml"
+    expectedBehaviors:
+      - "设计整体架构"
+      - "各子系统独立设计"
+      - "定义系统间接口"
+    successCriteria:
+      - "整体架构清晰"
+      - "每个子系统有独立设计"
+      - "系统间接口定义完整"
+    qualityMetrics:
+      - "架构合理率 >= 90%"
+      - "接口完整率 >= 90%"
+    maxDuration: 600
+  - name: "ai-design"
+    input: "设计 AI 功能方案，使用 {model-type}"
+    complexity: 5
+    expectedArtifacts:
+      - "specs/{feature}/design.md"
+      - "specs/{feature}/evaluation.md"
+    expectedBehaviors:
+      - "设计模型选型"
+      - "设计训练流程"
+      - "设计效果评估方案"
+    successCriteria:
+      - "模型选型合理"
+      - "效果评估方案完整"
+    qualityMetrics:
+      - "技术选型合理率 >= 90%"
+      - "评估方案完整率 >= 90%"
+    maxDuration: 600
+  - name: "security-sensitive-design"
+    input: "设计支付系统方案"
+    complexity: 5
+    expectedBehaviors:
+      - "识别安全风险"
+      - "设计安全策略"
+      - "设计审计日志"
+    successCriteria:
+      - "安全风险识别完整"
+      - "安全策略合理"
+      - "审计日志设计完整"
+    qualityMetrics:
+      - "安全考虑完整率 >= 90%"
+      - "合规性 >= 100%"
+    maxDuration: 600
+successCriteria:
+  passRate: 90
+  avgDuration: 450

package/plugins/benchmarks/templates/commands/review-template.yaml ADDED Viewed

@@ -0,0 +1,124 @@
+# AutoSpec Command Benchmark Template - Review
+# 适用于: 测试 review 命令（审查交付物）
+# init 后复制到 .autospec/benchmarks/ 后按需修改
+version: "1.0"
+name: "command-review"
+description: "Review 命令基准测试"
+type: command
+target: review
+testCases:
+  - name: "requirement-review"
+    input: "review requirement ./specs/{feature}/requirement.md"
+    complexity: 1
+    expectedOutput:
+      - "审查结论"
+      - "逐项判定表"
+      - "blocking 问题清单"
+      - "non-blocking 建议"
+    successCriteria:
+      - "审查报告结构完整"
+      - "逐项判定有证据支撑"
+    qualityMetrics:
+      - "问题检出率 >= 90%"
+      - "误报率 <= 10%"
+    maxDuration: 300
+  - name: "design-review"
+    input: "review design ./specs/{feature}/design.md"
+    complexity: 3
+    expectedOutput:
+      - "审查结论"
+      - "架构设计判定"
+      - "API 设计判定"
+      - "数据库设计判定"
+      - "安全考虑判定"
+    successCriteria:
+      - "各设计维度审查完整"
+      - "设计问题识别准确"
+    qualityMetrics:
+      - "设计问题检出率 >= 85%"
+      - "建议合理率 >= 90%"
+    maxDuration: 450
+  - name: "code-review"
+    input: "review code ./src/"
+    complexity: 3
+    expectedOutput:
+      - "审查结论"
+      - "代码质量评分"
+      - "安全问题检测"
+      - "性能问题检测"
+    successCriteria:
+      - "代码审查维度完整"
+      - "安全漏洞识别准确"
+    qualityMetrics:
+      - "代码问题检出率 >= 85%"
+      - "安全漏洞检出率 >= 90%"
+    maxDuration: 600
+  - name: "test-review"
+    input: "review test ./tests/"
+    complexity: 3
+    expectedOutput:
+      - "审查结论"
+      - "测试覆盖分析"
+      - "边界条件检查"
+      - "异常路径检查"
+    successCriteria:
+      - "测试覆盖分析准确"
+      - "遗漏测试识别有效"
+    qualityMetrics:
+      - "测试遗漏检出率 >= 85%"
+      - "覆盖率评估准确率 >= 90%"
+    maxDuration: 450
+  - name: "git-repo-review"
+    input: "review code https://github.com/{org}/{repo} --branch {branch}"
+    complexity: 5
+    expectedBehaviors:
+      - "自动克隆 git 仓库"
+      - "指定分支检出"
+      - "执行代码审查"
+      - "生成审查报告"
+    successCriteria:
+      - "git 克隆成功"
+      - "分支检出正确"
+      - "审查报告完整"
+    qualityMetrics:
+      - "仓库审查成功率 = 100%"
+      - "问题检出率 >= 80%"
+    maxDuration: 900
+  - name: "auto-review"
+    input: ""
+    complexity: 1
+    expectedBehaviors:
+      - "读取 state.json 确定当前阶段"
+      - "自动推断审查类型"
+      - "自动定位审查目标"
+    successCriteria:
+      - "自动推断正确"
+      - "审查目标定位准确"
+    qualityMetrics:
+      - "自动推断准确率 >= 95%"
+    maxDuration: 300
+  - name: "multi-system-review"
+    input: "review code ./src/ --subsystem=backend"
+    complexity: 5
+    expectedBehaviors:
+      - "识别多系统配置"
+      - "审查指定子系统"
+    successCriteria:
+      - "子系统识别正确"
+      - "审查范围准确"
+    qualityMetrics:
+      - "子系统审查准确率 = 100%"
+    maxDuration: 600
+successCriteria:
+  passRate: 90
+  avgDuration: 400

package/plugins/benchmarks/templates/commands/run-template.yaml ADDED Viewed

@@ -0,0 +1,127 @@
+# AutoSpec Command Benchmark Template - Run
+# 适用于: 测试 run 命令（完整流程）
+# init 后复制到 .autospec/benchmarks/ 后按需修改
+version: "1.0"
+name: "command-run"
+description: "Run 命令基准测试 - 启动完整流程"
+type: command
+target: run
+testCases:
+  - name: "waterfall-simple"
+    input: "实现 {feature-name} --workflow=waterfall"
+    complexity: 1
+    expectedStages:
+      - exploration
+      - design
+      - implement
+      - test
+      - deliver
+    expectedArtifacts:
+      - "specs/{feature}/requirement.md"
+      - "specs/{feature}/design.md"
+      - "src/ 源代码"
+      - "tests/ 测试代码"
+      - "release-notes.md"
+    successCriteria:
+      - "完整执行 5 个阶段"
+      - "每个阶段通过 Layer1+Layer2 验证"
+      - "产出物完整"
+    qualityMetrics:
+      - "阶段完成率 = 100%"
+      - "Layer1 通过率 >= 90%"
+      - "Layer2 通过率 >= 80%"
+    maxDuration: 3600
+  - name: "agile-iteration"
+    input: "实现 {feature-name} --workflow=agile"
+    complexity: 3
+    expectedStages:
+      - exploration
+      - story-iterations
+      - integration-test
+      - deliver
+    expectedArtifacts:
+      - "specs/{feature}/requirement.md"
+      - "specs/{feature}/stories.md"
+      - "src/ 源代码"
+      - "tests/ 测试代码"
+    successCriteria:
+      - "用户故事拆分合理"
+      - "集成测试通过"
+    qualityMetrics:
+      - "故事完成率 >= 90%"
+      - "集成测试通过率 = 100%"
+    maxDuration: 1800
+  - name: "experiment-mode"
+    input: "实现 {feature-name} --workflow=experiment"
+    complexity: 5
+    expectedStages:
+      - exploration
+      - design
+      - prototype-implement
+      - evaluation
+      - assumption-validation
+    expectedArtifacts:
+      - "specs/{feature}/assumptions.md"
+      - "specs/{feature}/evaluation-plan.md"
+      - "specs/{feature}/evaluation-report.md"
+    successCriteria:
+      - "假设定义清晰可验证"
+      - "效果评测自动执行"
+    qualityMetrics:
+      - "假设完整率 >= 90%"
+      - "效果达标率 >= 80%"
+    maxDuration: 2700
+  - name: "hotfix-mode"
+    input: "修复 {bug-description} --workflow=hotfix"
+    complexity: 1
+    expectedStages:
+      - diagnosis
+      - risk-assessment
+      - quick-fix
+      - quick-review
+      - quick-deploy
+    expectedArtifacts:
+      - "specs/hotfix-{id}/hotfix-issue.md"
+      - "risk-assessment.md"
+      - "变更代码"
+      - "回归测试"
+    successCriteria:
+      - "问题定位准确"
+      - "变更最小化"
+      - "回归测试通过"
+    qualityMetrics:
+      - "修复耗时 < 15 分钟"
+    maxDuration: 900
+  - name: "multi-system-waterfall"
+    input: "实现 {feature}，包含后端/前端/移动端 --workflow=waterfall"
+    complexity: 5
+    expectedStages:
+      - exploration
+      - design
+      - implement
+      - test
+      - deliver
+    expectedArtifacts:
+      - "specs/{feature}/design/overview.md"
+      - "specs/{feature}/design/backend.md"
+      - "specs/{feature}/design/frontend.md"
+      - "contracts/api.yaml"
+    successCriteria:
+      - "识别所有子系统"
+      - "编码顺序正确"
+      - "集成测试通过"
+    qualityMetrics:
+      - "子系统识别率 = 100%"
+      - "依赖顺序正确率 = 100%"
+    maxDuration: 5400
+successCriteria:
+  passRate: 85
+  avgDuration: 1800

package/plugins/benchmarks/templates/commands/test-template.yaml ADDED Viewed

@@ -0,0 +1,149 @@
+# AutoSpec Command Benchmark Template - Test
+# 适用于: 测试 test 命令（验证+修复+评测）
+# init 后复制到 .autospec/benchmarks/ 后按需修改
+version: "1.0"
+name: "command-test"
+description: "Test 命令基准测试 - 统一测试命令"
+type: command
+target: test
+testCases:
+  - name: "validation-only"
+    input: "/autospec:test"
+    complexity: 1
+    expectedBehaviors:
+      - "执行编译检查"
+      - "执行测试"
+      - "执行 Lint 检查"
+      - "执行类型检查"
+    expectedOutput:
+      - "验证报告"
+      - "各项验证结果"
+    successCriteria:
+      - "验证步骤完整"
+      - "结果报告清晰"
+    qualityMetrics:
+      - "验证执行率 = 100%"
+      - "报告完整率 >= 95%"
+    maxDuration: 300
+  - name: "validation-with-fix"
+    input: "/autospec:test --fix"
+    complexity: 3
+    expectedBehaviors:
+      - "执行 Layer1 验证"
+      - "问题分析"
+      - "修复分类"
+      - "执行修复"
+      - "效果验证"
+    expectedOutput:
+      - "验证报告"
+      - "问题分析报告"
+      - "修复记录"
+    successCriteria:
+      - "问题分类准确"
+      - "修复有效"
+      - "退化检测正确"
+    qualityMetrics:
+      - "修复有效率 >= 85%"
+      - "退化检出率 = 100%"
+    maxDuration: 600
+  - name: "subsystem-test"
+    input: "/autospec:test --subsystem=backend --fix"
+    complexity: 3
+    expectedBehaviors:
+      - "读取 config.json 获取子系统配置"
+      - "只验证指定子系统"
+      - "执行子系统修复"
+    successCriteria:
+      - "子系统识别正确"
+      - "验证范围准确"
+    qualityMetrics:
+      - "子系统验证准确率 = 100%"
+    maxDuration: 450
+  - name: "ai-evaluation"
+    input: "/autospec:test --scope=evaluation"
+    complexity: 5
+    expectedBehaviors:
+      - "检测 AI/模型组件"
+      - "加载评测数据集"
+      - "执行效果评测"
+      - "生成评测报告"
+    expectedOutput:
+      - "评测报告"
+      - "指标结果"
+      - "Badcase 分析"
+    successCriteria:
+      - "评测方案执行完整"
+      - "指标计算准确"
+    qualityMetrics:
+      - "评测完整率 = 100%"
+      - "Badcase 检出率 >= 90%"
+    maxDuration: 900
+  - name: "git-repo-test"
+    input: "/autospec:test https://github.com/{org}/{repo} --branch main"
+    complexity: 5
+    expectedBehaviors:
+      - "克隆 git 仓库"
+      - "检出指定分支"
+      - "执行验证"
+    successCriteria:
+      - "git 克隆成功"
+      - "验证执行完整"
+    qualityMetrics:
+      - "仓库测试成功率 >= 95%"
+    maxDuration: 600
+  - name: "deep-analysis"
+    input: "/autospec:test --deep --fix"
+    complexity: 5
+    expectedBehaviors:
+      - "执行完整验证"
+      - "深度问题分析"
+      - "根因识别"
+      - "全面修复"
+    expectedOutput:
+      - "深度分析报告"
+      - "根因分析"
+      - "质量评分"
+    successCriteria:
+      - "根因识别准确"
+      - "质量提升明显"
+    qualityMetrics:
+      - "根因识别率 >= 90%"
+      - "质量提升 >= 10%"
+    maxDuration: 900
+  - name: "unit-test-scope"
+    input: "/autospec:test --scope=unit"
+    complexity: 1
+    expectedBehaviors:
+      - "只执行单元测试"
+      - "跳过集成测试"
+    successCriteria:
+      - "测试范围准确"
+    qualityMetrics:
+      - "单元测试覆盖率 >= 95%"
+    maxDuration: 180
+  - name: "integration-test-scope"
+    input: "/autospec:test --scope=integration --fix"
+    complexity: 3
+    expectedBehaviors:
+      - "执行集成测试"
+      - "识别集成问题"
+      - "修复集成问题"
+    successCriteria:
+      - "集成测试执行完整"
+    qualityMetrics:
+      - "集成测试覆盖率 = 100%"
+    maxDuration: 450
+successCriteria:
+  passRate: 90
+  avgDuration: 450