@winspan/claude-forge 8.53.2 → 8.54.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DEVELOPMENT.md +290 -221
- package/README.md +50 -8
- package/dist/cli/commands/skills.d.ts.map +1 -1
- package/dist/cli/commands/skills.js +7 -3
- package/dist/cli/commands/skills.js.map +1 -1
- package/dist/cli/init/hook-manager.d.ts +1 -1
- package/dist/cli/init/hook-manager.d.ts.map +1 -1
- package/dist/cli/init/hook-manager.js +1 -0
- package/dist/cli/init/hook-manager.js.map +1 -1
- package/dist/core/storage/events.d.ts.map +1 -1
- package/dist/core/storage/events.js +0 -1
- package/dist/core/storage/events.js.map +1 -1
- package/dist/core/storage/maintenance.d.ts +25 -3
- package/dist/core/storage/maintenance.d.ts.map +1 -1
- package/dist/core/storage/maintenance.js +33 -4
- package/dist/core/storage/maintenance.js.map +1 -1
- package/dist/core/storage/routing.d.ts +4 -0
- package/dist/core/storage/routing.d.ts.map +1 -1
- package/dist/core/storage/routing.js +10 -4
- package/dist/core/storage/routing.js.map +1 -1
- package/dist/core/storage/sessions.d.ts +17 -0
- package/dist/core/storage/sessions.d.ts.map +1 -1
- package/dist/core/storage/sessions.js +64 -0
- package/dist/core/storage/sessions.js.map +1 -1
- package/dist/core/storage/skills.d.ts +4 -0
- package/dist/core/storage/skills.d.ts.map +1 -1
- package/dist/core/storage/skills.js +10 -2
- package/dist/core/storage/skills.js.map +1 -1
- package/dist/core/storage/sqlite.d.ts +5 -0
- package/dist/core/storage/sqlite.d.ts.map +1 -1
- package/dist/core/storage/sqlite.js +6 -0
- package/dist/core/storage/sqlite.js.map +1 -1
- package/dist/core/storage/tasks.d.ts.map +1 -1
- package/dist/core/storage/tasks.js +2 -0
- package/dist/core/storage/tasks.js.map +1 -1
- package/dist/core/types.d.ts +7 -0
- package/dist/core/types.d.ts.map +1 -1
- package/dist/daemon/index.d.ts.map +1 -1
- package/dist/daemon/index.js +19 -4
- package/dist/daemon/index.js.map +1 -1
- package/dist/skills/registry.d.ts.map +1 -1
- package/dist/skills/registry.js +13 -2
- package/dist/skills/registry.js.map +1 -1
- package/dist/skills/semantic-matcher.d.ts +2 -2
- package/dist/skills/semantic-matcher.d.ts.map +1 -1
- package/dist/skills/semantic-matcher.js +14 -19
- package/dist/skills/semantic-matcher.js.map +1 -1
- package/dist/skills/upgrade-engine.d.ts +3 -1
- package/dist/skills/upgrade-engine.d.ts.map +1 -1
- package/dist/skills/upgrade-engine.js +25 -14
- package/dist/skills/upgrade-engine.js.map +1 -1
- package/dist/web/analytics/weekly-report.d.ts.map +1 -1
- package/dist/web/analytics/weekly-report.js +21 -29
- package/dist/web/analytics/weekly-report.js.map +1 -1
- package/dist/web/routes/patch.d.ts.map +1 -1
- package/dist/web/routes/patch.js +32 -2
- package/dist/web/routes/patch.js.map +1 -1
- package/dist/web/routes/sessions.d.ts.map +1 -1
- package/dist/web/routes/sessions.js +9 -7
- package/dist/web/routes/sessions.js.map +1 -1
- package/dist/web/routes/trace.d.ts.map +1 -1
- package/dist/web/routes/trace.js +2 -3
- package/dist/web/routes/trace.js.map +1 -1
- package/dist/web/server.d.ts.map +1 -1
- package/dist/web/server.js +3 -2
- package/dist/web/server.js.map +1 -1
- package/package.json +12 -2
- package/scripts/postinstall.cjs +21 -0
- package/.claude/CLAUDE.md +0 -17
- package/.eslintrc.js +0 -23
- package/.prettierrc +0 -8
- package/ARCHITECTURE_ISSUES.md +0 -249
- package/CLAUDE.md +0 -265
- package/CLAUDE.md.backup +0 -488
- package/docs/concurrent-agents.md +0 -129
- package/docs/design/architecture-review-20260516.md +0 -232
- package/docs/design/fix-skills-data-and-set-leak-spec-20260516-1300.md +0 -219
- package/docs/design/h1-storage-aggregation-spec-20260518-1121.md +0 -299
- package/docs/design/h2-getdatabase-encapsulation-spec-20260518-1450.md +0 -191
- package/docs/design/h3-fallback-removal-spec-20260518-1245.md +0 -76
- package/docs/design/h4-index-dedup-spec-20260518-1230.md +0 -109
- package/docs/design/h6-services-migration-spec-20260518-1355.md +0 -82
- package/docs/design/hook-failure-queue-spec-20260516-1530.md +0 -204
- package/docs/design/l1-swarm-protocol-extract-spec-20260518-1605.md +0 -106
- package/docs/design/m10-forge-paths-spec-20260518-1320.md +0 -121
- package/docs/design/m2-m3-tool-input-spec-20260518-1425.md +0 -131
- package/docs/design/m7-routing-event-association-spec-20260518-1545.md +0 -103
- package/docs/design/project-path-gitroot-spec-20260518-1715.md +0 -134
- package/docs/design/refactor-phase1-spec-20260515-1600.md +0 -543
- package/docs/design/refactor-phase2-spec-20260515-1700.md +0 -424
- package/docs/design/skill-ai-upgrade-spec-20260518-1930.md +0 -297
- package/docs/design/task-active-gc-spec-20260518-1745.md +0 -146
- package/docs/design/tasks-list-filter-pagination-spec-20260518-0930.md +0 -208
- package/docs/implementation/daemon-skill-sync-changelog-20260518-2000.md +0 -22
- package/docs/implementation/fix-skills-data-and-set-leak-changelog-20260516-1300.md +0 -104
- package/docs/implementation/h1-storage-aggregation-changelog-20260518-1121.md +0 -82
- package/docs/implementation/h2-final-changelog-20260518-1530.md +0 -61
- package/docs/implementation/h2-phase1-safety-net-changelog-20260518-1450.md +0 -70
- package/docs/implementation/h2-phase2-operations-changelog-20260518-1450.md +0 -120
- package/docs/implementation/h2-phase3-callsites-changelog-20260518-1450.md +0 -71
- package/docs/implementation/h3-fallback-removal-changelog-20260518-1245.md +0 -71
- package/docs/implementation/h4-index-dedup-changelog-20260518-1230.md +0 -60
- package/docs/implementation/h6-services-migration-changelog-20260518-1355.md +0 -46
- package/docs/implementation/h7-m9-defaults-changelog-20260518-1300.md +0 -46
- package/docs/implementation/hook-failure-queue-changelog-20260516-1530.md +0 -196
- package/docs/implementation/hotfix-daemon-event-reject-20260516-1430.md +0 -56
- package/docs/implementation/l1-swarm-protocol-extract-changelog-20260518-1605.md +0 -45
- package/docs/implementation/l3-l4-daemon-perf-changelog-20260518-1410.md +0 -63
- package/docs/implementation/l6-l8-final-cleanup-changelog-20260518-1640.md +0 -38
- package/docs/implementation/m1-m4-m5-l7-cleanup-changelog-20260518-1310.md +0 -58
- package/docs/implementation/m10-forge-paths-changelog-20260518-1320.md +0 -60
- package/docs/implementation/m2-m3-tool-input-changelog-20260518-1425.md +0 -43
- package/docs/implementation/m6-m8-naming-shutdown-changelog-20260518-1340.md +0 -56
- package/docs/implementation/m7-routing-association-changelog-20260518-1545.md +0 -69
- package/docs/implementation/project-path-gitroot-changelog-20260518-1715.md +0 -63
- package/docs/implementation/refactor-phase1-changelog-20260515-1630.md +0 -354
- package/docs/implementation/refactor-phase2-changelog-20260515-1705.md +0 -421
- package/docs/implementation/skill-ai-upgrade-changelog-20260518-1930.md +0 -49
- package/docs/implementation/task-active-gc-changelog-20260518-1745.md +0 -35
- package/docs/implementation/task-title-summary-changelog-20260518-1130.md +0 -39
- package/docs/implementation/tasks-detail-back-loses-filters-changelog-20260518-1100.md +0 -22
- package/docs/implementation/tasks-list-filter-pagination-changelog-20260518-0930.md +0 -72
- package/docs/implementation/tasks-page-white-screen-hotfix-changelog-20260518-1015.md +0 -56
- package/docs/reviews/claudemd-template-sync.md +0 -54
- package/docs/reviews/task-title-summary.md +0 -92
- package/docs/reviews/tasks-detail-back-loses-filters.md +0 -58
- package/docs/reviews/tasks-filter-pagination.md +0 -80
- package/docs/reviews/tasks-page-white-screen-hotfix.md +0 -126
- package/docs/ruflo-learning-strategy.md +0 -322
- package/docs/skills-deduplication-analysis.md +0 -83
- package/docs/skills-multiformat-support.md +0 -177
- package/docs/skills-third-party.md +0 -183
- package/docs/testing/tasks-filter-pagination-test-report.md +0 -86
- package/forge +0 -321
- package/playwright.config.ts +0 -40
- package/scripts/demo-v2.ts +0 -91
- package/scripts/dev-daemon.sh +0 -232
- package/scripts/dev-web.ts +0 -109
- package/scripts/e2e-mcp-link.ts +0 -423
- package/scripts/e2e-methodology-quality.ts +0 -253
- package/scripts/e2e-routing.ts +0 -456
- package/scripts/e2e-user-methodology.ts +0 -326
- package/scripts/e2e-web-workflows.ts +0 -299
- package/scripts/migrate-legacy-to-dynamic.sql +0 -108
- package/scripts/regenerate-execution-docs.ts +0 -116
- package/scripts/sync-agent-skills.ts +0 -193
- package/scripts/test-hook.sh +0 -71
- package/scripts/verify-skill-loading.ts +0 -62
- package/src/claudemd/claudemd-generator.ts +0 -568
- package/src/claudemd/convention-extractor.ts +0 -69
- package/src/claudemd/index.ts +0 -35
- package/src/claudemd/persona-manager.ts +0 -88
- package/src/claudemd/resume-manager.ts +0 -236
- package/src/claudemd/tech-detector.ts +0 -220
- package/src/claudemd/templates/swarm-protocol.md +0 -222
- package/src/cli/commands/claudemd.ts +0 -84
- package/src/cli/commands/config.ts +0 -46
- package/src/cli/commands/daemon.ts +0 -310
- package/src/cli/commands/executions.ts +0 -115
- package/src/cli/commands/init.ts +0 -204
- package/src/cli/commands/logs.ts +0 -181
- package/src/cli/commands/mcp.ts +0 -242
- package/src/cli/commands/menu.ts +0 -357
- package/src/cli/commands/skills.ts +0 -328
- package/src/cli/commands/stats.ts +0 -73
- package/src/cli/commands/status.ts +0 -69
- package/src/cli/commands/template.ts +0 -77
- package/src/cli/commands/trace.ts +0 -148
- package/src/cli/index.ts +0 -42
- package/src/cli/init/hook-manager.ts +0 -132
- package/src/core/ai/provider.ts +0 -308
- package/src/core/ai/types.ts +0 -51
- package/src/core/config.ts +0 -124
- package/src/core/constants.ts +0 -67
- package/src/core/event-fields.ts +0 -32
- package/src/core/queue/index.ts +0 -192
- package/src/core/storage/base.ts +0 -302
- package/src/core/storage/events.ts +0 -434
- package/src/core/storage/injections.ts +0 -78
- package/src/core/storage/maintenance.ts +0 -59
- package/src/core/storage/migrations/002_add_skill_tracking.sql +0 -6
- package/src/core/storage/migrations/003_add_skill_invocations.sql +0 -23
- package/src/core/storage/performance-indexes.sql +0 -23
- package/src/core/storage/routing.ts +0 -322
- package/src/core/storage/rows.ts +0 -112
- package/src/core/storage/schema.sql +0 -224
- package/src/core/storage/sessions.ts +0 -168
- package/src/core/storage/skills.ts +0 -233
- package/src/core/storage/sqlite.ts +0 -293
- package/src/core/storage/tasks.ts +0 -318
- package/src/core/storage/token-usage.ts +0 -93
- package/src/core/types.ts +0 -181
- package/src/core/utils/error-handler.ts +0 -257
- package/src/core/utils/forge-resume-block.ts +0 -74
- package/src/core/utils/format.ts +0 -69
- package/src/core/utils/git.ts +0 -23
- package/src/core/utils/logger.ts +0 -134
- package/src/core/utils/lru-cache.ts +0 -54
- package/src/core/utils/path.ts +0 -19
- package/src/core/utils/session.ts +0 -26
- package/src/core/utils/time.ts +0 -37
- package/src/core/utils/token-tracker.ts +0 -97
- package/src/daemon/event-parser.ts +0 -36
- package/src/daemon/handlers/history-exporter.ts +0 -117
- package/src/daemon/handlers/post-tool-use.ts +0 -54
- package/src/daemon/handlers/stop.ts +0 -208
- package/src/daemon/handlers/user-prompt.ts +0 -178
- package/src/daemon/hook-sync.ts +0 -91
- package/src/daemon/index.ts +0 -312
- package/src/daemon/launchd/com.claude-forge.daemon.plist.template +0 -47
- package/src/daemon/launchd-installer.ts +0 -260
- package/src/daemon/lifecycle.ts +0 -128
- package/src/daemon/router.ts +0 -40
- package/src/daemon/server.ts +0 -196
- package/src/daemon/services/task-segmenter.ts +0 -112
- package/src/daemon/skill-sync.ts +0 -88
- package/src/hooks/hook-lib.sh +0 -118
- package/src/hooks/notification.sh +0 -35
- package/src/hooks/post-tool-use.sh +0 -61
- package/src/hooks/pre-tool-use.sh +0 -63
- package/src/hooks/stop.sh +0 -43
- package/src/hooks/user-prompt-submit.sh +0 -69
- package/src/mcp/server.ts +0 -322
- package/src/skills/index.ts +0 -2
- package/src/skills/invocation-guard.ts +0 -177
- package/src/skills/matcher.ts +0 -148
- package/src/skills/official/code-simplifier.md +0 -52
- package/src/skills/official/find-skills.md +0 -142
- package/src/skills/official/official-api-design.md +0 -30
- package/src/skills/official/official-architecture-decision.md +0 -41
- package/src/skills/official/official-bmad.md +0 -118
- package/src/skills/official/official-db-schema-design.md +0 -34
- package/src/skills/official/official-debug.md +0 -25
- package/src/skills/official/official-doc-driven.md +0 -31
- package/src/skills/official/official-harness-engineering.md +0 -108
- package/src/skills/official/official-performance-optimization.md +0 -30
- package/src/skills/official/official-pr-review.md +0 -35
- package/src/skills/official/official-release-checklist.md +0 -30
- package/src/skills/official/official-security-hardening.md +0 -32
- package/src/skills/official/official-spec-driven-design.md +0 -31
- package/src/skills/official/planning-with-files.md +0 -241
- package/src/skills/official/ui-ux-pro-max.md +0 -105
- package/src/skills/official/webapp-testing.md +0 -96
- package/src/skills/official-skills.ts +0 -89
- package/src/skills/registry.ts +0 -355
- package/src/skills/semantic-matcher.ts +0 -234
- package/src/skills/tools/pipeline-suggest.ts +0 -226
- package/src/skills/tools/skill-invoke.ts +0 -168
- package/src/skills/tools/skill-list.ts +0 -59
- package/src/skills/upgrade-engine.ts +0 -541
- package/src/skills/upgrade-prompt.ts +0 -84
- package/src/templates/go.yaml +0 -53
- package/src/templates/python.yaml +0 -59
- package/src/templates/react.yaml +0 -55
- package/src/templates/template-manager.ts +0 -170
- package/src/web/analytics/anti-pattern-detector.ts +0 -367
- package/src/web/analytics/drift-detector.ts +0 -219
- package/src/web/analytics/weekly-report.ts +0 -431
- package/src/web/auth-middleware.ts +0 -54
- package/src/web/routes/_helpers.ts +0 -34
- package/src/web/routes/ai.ts +0 -204
- package/src/web/routes/auth.ts +0 -22
- package/src/web/routes/drift.ts +0 -25
- package/src/web/routes/error-handler.ts +0 -120
- package/src/web/routes/events.ts +0 -47
- package/src/web/routes/insights.ts +0 -43
- package/src/web/routes/patch.ts +0 -117
- package/src/web/routes/reports.ts +0 -34
- package/src/web/routes/rules.ts +0 -76
- package/src/web/routes/sessions.ts +0 -250
- package/src/web/routes/skill-stats.ts +0 -92
- package/src/web/routes/skills.ts +0 -350
- package/src/web/routes/static.ts +0 -67
- package/src/web/routes/stats.ts +0 -50
- package/src/web/routes/status.ts +0 -30
- package/src/web/routes/tasks.ts +0 -193
- package/src/web/routes/token-usage.ts +0 -20
- package/src/web/routes/trace.ts +0 -126
- package/src/web/routes/types.ts +0 -57
- package/src/web/server.ts +0 -134
- package/src/web/ssrf-guard.ts +0 -112
- package/src/web/static/index.html +0 -3251
- package/src/web/static/vendor/chart.umd.min.js +0 -20
- package/tests/e2e/dashboard.spec.ts +0 -205
- package/tests/e2e/routing-skill-e2e.test.ts +0 -39
- package/tests/helpers/mock-ai.ts +0 -92
- package/tests/helpers/mock-storage.ts +0 -159
- package/tests/integration/claudemd-generator.test.ts +0 -90
- package/tests/integration/queue-replay.integration.test.ts +0 -193
- package/tests/integration/tasks-filter.integration.test.ts +0 -154
- package/tests/integration/web-analytics.integration.test.ts +0 -133
- package/tests/integration/web-stats.integration.test.ts +0 -135
- package/tests/integration/web-trace.integration.test.ts +0 -175
- package/tests/performance/database.benchmark.ts +0 -161
- package/tests/semantic-matcher.test.ts +0 -99
- package/tests/skill-matcher.test.ts +0 -110
- package/tests/unit/ai-provider-retry.test.ts +0 -194
- package/tests/unit/ai-provider-vision.test.ts +0 -224
- package/tests/unit/claudemd-generator.test.ts +0 -68
- package/tests/unit/cli-mcp.test.ts +0 -141
- package/tests/unit/core/forge-paths.test.ts +0 -99
- package/tests/unit/daemon/hook-sync.test.ts +0 -71
- package/tests/unit/daemon/post-tool-use.test.ts +0 -121
- package/tests/unit/daemon/skill-sync.test.ts +0 -75
- package/tests/unit/daemon/stop-handler-behavior-summary.test.ts +0 -202
- package/tests/unit/daemon/task-segmenter-recover.test.ts +0 -84
- package/tests/unit/event-fields.test.ts +0 -88
- package/tests/unit/event-parser.test.ts +0 -55
- package/tests/unit/handlers.test.ts +0 -171
- package/tests/unit/hooks/resolve-project-path.test.ts +0 -122
- package/tests/unit/invocation-guard.test.ts +0 -125
- package/tests/unit/queue.test.ts +0 -272
- package/tests/unit/router.test.ts +0 -138
- package/tests/unit/security.test.ts +0 -128
- package/tests/unit/skill-invocations-workflow.test.ts +0 -495
- package/tests/unit/skill-registry.test.ts +0 -94
- package/tests/unit/skills/invocation-guard-ttl.test.ts +0 -211
- package/tests/unit/skills/official-skills-loader.test.ts +0 -126
- package/tests/unit/skills/registry-multiformat.test.ts +0 -92
- package/tests/unit/skills/upgrade-engine-parse.test.ts +0 -138
- package/tests/unit/skills/upgrade-engine.test.ts +0 -401
- package/tests/unit/skills/upgrade-prompt.test.ts +0 -89
- package/tests/unit/socket-server.test.ts +0 -183
- package/tests/unit/storage/event-operations-aggregates.test.ts +0 -342
- package/tests/unit/storage/migration-idempotent.test.ts +0 -304
- package/tests/unit/storage/routing-aggregates.test.ts +0 -276
- package/tests/unit/storage/routing.test.ts +0 -117
- package/tests/unit/storage/schema-missing.test.ts +0 -81
- package/tests/unit/storage/session-operations-aggregates.test.ts +0 -120
- package/tests/unit/storage/sessions-aggregate.test.ts +0 -435
- package/tests/unit/storage/skill-operations-counts.test.ts +0 -106
- package/tests/unit/storage/skills-aggregates.test.ts +0 -104
- package/tests/unit/storage/sqlite-refactor-harness.test.ts +0 -314
- package/tests/unit/storage/task-operations-counts.test.ts +0 -46
- package/tests/unit/storage/tasks-getById.test.ts +0 -343
- package/tests/unit/storage/tasks-stale-gc.test.ts +0 -86
- package/tests/unit/storage.test.ts +0 -172
- package/tests/unit/token-usage.test.ts +0 -144
- package/tests/unit/type-guards.test.ts +0 -201
- package/tests/unit/utils/format.test.ts +0 -189
- package/tests/unit/utils/session.test.ts +0 -89
- package/tests/unit/utils/time.test.ts +0 -112
- package/tests/unit/web/navigation-back-contract.test.ts +0 -134
- package/tests/unit/web/routes-auth.test.ts +0 -93
- package/tests/unit/web/routes-events.test.ts +0 -101
- package/tests/unit/web/routes-rules.test.ts +0 -182
- package/tests/unit/web/routes-sessions.test.ts +0 -181
- package/tests/unit/web/routes-skill-stats.test.ts +0 -179
- package/tests/unit/web/routes-stats.test.ts +0 -92
- package/tests/unit/web/routes-tasks.test.ts +0 -385
- package/tests/unit/web/task-title-contract.test.ts +0 -210
- package/tests/unit/web/tasks-component-contract.test.ts +0 -179
- package/tsconfig.json +0 -22
- package/vitest.config.ts +0 -21
- package/vitest.integration.config.ts +0 -16
- package/web/CLAUDE.md +0 -20
- package/web/index.html +0 -13
- package/web/package-lock.json +0 -4854
- package/web/package.json +0 -35
- package/web/postcss.config.js +0 -6
- package/web/src/App.tsx +0 -110
- package/web/src/components/CodeBlock.tsx +0 -31
- package/web/src/components/Confirm.tsx +0 -96
- package/web/src/components/Drawer.tsx +0 -60
- package/web/src/components/Layout.tsx +0 -145
- package/web/src/components/MarkdownRenderer.tsx +0 -77
- package/web/src/components/SearchInput.tsx +0 -31
- package/web/src/components/SessionDetailContent.tsx +0 -157
- package/web/src/components/Toast.tsx +0 -92
- package/web/src/index.css +0 -19
- package/web/src/main.tsx +0 -31
- package/web/src/pages/AIConfig.tsx +0 -233
- package/web/src/pages/Dashboard.tsx +0 -572
- package/web/src/pages/Events.tsx +0 -271
- package/web/src/pages/Reports.tsx +0 -428
- package/web/src/pages/SessionDetail.tsx +0 -162
- package/web/src/pages/Sessions.tsx +0 -205
- package/web/src/pages/Skills.tsx +0 -180
- package/web/src/pages/TaskDetail.tsx +0 -515
- package/web/src/pages/Tasks.tsx +0 -415
- package/web/src/utils/auth.ts +0 -59
- package/web/src/utils/export.ts +0 -54
- package/web/src/utils/navigation.ts +0 -25
- package/web/src/utils/task-title.ts +0 -49
- package/web/src/utils/time.ts +0 -13
- package/web/tailwind.config.js +0 -11
- package/web/tsconfig.json +0 -21
- package/web/tsconfig.node.json +0 -10
- package/web/vite.config.ts +0 -76
- package/winspan-claude-forge-8.43.0.tgz +0 -0
package/scripts/e2e-mcp-link.ts
DELETED
|
@@ -1,423 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env tsx
|
|
2
|
-
/**
|
|
3
|
-
* E2E Verification: MCP call chain → skill_invoke → DB persistence
|
|
4
|
-
*
|
|
5
|
-
* Black-box verification of the full pipeline:
|
|
6
|
-
*
|
|
7
|
-
* MCP Client (this script)
|
|
8
|
-
* → HTTP POST /mcp (Bearer auth)
|
|
9
|
-
* → StreamableHTTPServerTransport
|
|
10
|
-
* → McpServer.callTool('skill_invoke' / 'skill_list')
|
|
11
|
-
* → skillInvoke() / skillList()
|
|
12
|
-
* → SQLiteStorage.writeSkillInvocation()
|
|
13
|
-
*
|
|
14
|
-
* Steps:
|
|
15
|
-
* 1. Start a standalone WebServer (independent of system daemon)
|
|
16
|
-
* 2. Use MCP SDK Client to connect via HTTP
|
|
17
|
-
* 3. Call skill_invoke / skill_list
|
|
18
|
-
* 4. Query SQLite directly to verify skill_invocations table
|
|
19
|
-
* 5. Print verification report
|
|
20
|
-
*
|
|
21
|
-
* Isolation: temp dir for DB, swaps in test daemon.token (restored on exit).
|
|
22
|
-
*/
|
|
23
|
-
|
|
24
|
-
import { mkdtempSync, rmSync, writeFileSync, readFileSync, existsSync, mkdirSync, unlinkSync } from 'node:fs';
|
|
25
|
-
import { tmpdir } from 'node:os';
|
|
26
|
-
import { join } from 'node:path';
|
|
27
|
-
import type { AddressInfo } from 'node:net';
|
|
28
|
-
|
|
29
|
-
import { SQLiteStorage } from '../src/core/storage/sqlite.js';
|
|
30
|
-
import { RuleEngine } from '../src/engine/rule-engine.js';
|
|
31
|
-
import { SkillRegistry } from '../src/skills/registry.js';
|
|
32
|
-
import { InvocationGuard } from '../src/skills/invocation-guard.js';
|
|
33
|
-
import { WebServer } from '../src/web/server.js';
|
|
34
|
-
import { FORGE_PATHS } from '../src/core/constants.js';
|
|
35
|
-
import { routingState } from '../src/daemon/routing-state.js';
|
|
36
|
-
|
|
37
|
-
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
|
38
|
-
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
|
|
39
|
-
|
|
40
|
-
// ─── Test infra ────────────────────────────────────────────────────────────
|
|
41
|
-
|
|
42
|
-
const TEST_TOKEN = `e2e-mcp-token-${Date.now()}`;
|
|
43
|
-
const TOKEN_FILE = join(FORGE_PATHS.home(), 'daemon.token');
|
|
44
|
-
|
|
45
|
-
const tmp = mkdtempSync(join(tmpdir(), 'forge-e2e-mcp-'));
|
|
46
|
-
const dbPath = join(tmp, 'data.db');
|
|
47
|
-
|
|
48
|
-
interface TestResult {
|
|
49
|
-
step: string;
|
|
50
|
-
pass: boolean;
|
|
51
|
-
details: string;
|
|
52
|
-
}
|
|
53
|
-
const results: TestResult[] = [];
|
|
54
|
-
function record(step: string, pass: boolean, details: string): void {
|
|
55
|
-
results.push({ step, pass, details });
|
|
56
|
-
const mark = pass ? 'PASS' : 'FAIL';
|
|
57
|
-
console.log(`[${mark}] ${step}: ${details}`);
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
// Backup user's daemon.token, install test token (mirrors mcp-server.test.ts pattern).
|
|
61
|
-
let backupToken: string | null = null;
|
|
62
|
-
function installTestToken(): void {
|
|
63
|
-
if (existsSync(TOKEN_FILE)) {
|
|
64
|
-
backupToken = readFileSync(TOKEN_FILE, 'utf-8');
|
|
65
|
-
} else {
|
|
66
|
-
backupToken = null;
|
|
67
|
-
mkdirSync(FORGE_PATHS.home(), { recursive: true });
|
|
68
|
-
}
|
|
69
|
-
writeFileSync(TOKEN_FILE, TEST_TOKEN, { mode: 0o600 });
|
|
70
|
-
}
|
|
71
|
-
function restoreTestToken(): void {
|
|
72
|
-
if (backupToken !== null) {
|
|
73
|
-
writeFileSync(TOKEN_FILE, backupToken, { mode: 0o600 });
|
|
74
|
-
} else if (existsSync(TOKEN_FILE)) {
|
|
75
|
-
unlinkSync(TOKEN_FILE);
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
function authHeaders(extra: Record<string, string> = {}): Record<string, string> {
|
|
80
|
-
return { Authorization: `Bearer ${TEST_TOKEN}`, ...extra };
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
async function newClient(port: number, headers: Record<string, string>): Promise<Client> {
|
|
84
|
-
const transport = new StreamableHTTPClientTransport(new URL(`http://127.0.0.1:${port}/mcp`), {
|
|
85
|
-
requestInit: { headers },
|
|
86
|
-
});
|
|
87
|
-
const client = new Client({ name: 'forge-e2e-client', version: '1.0.0' });
|
|
88
|
-
await client.connect(transport);
|
|
89
|
-
return client;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// ─── Main ──────────────────────────────────────────────────────────────────
|
|
93
|
-
|
|
94
|
-
async function main(): Promise<void> {
|
|
95
|
-
console.log('=== E2E MCP Link Verification ===\n');
|
|
96
|
-
console.log(`tmp dir: ${tmp}`);
|
|
97
|
-
console.log(`db: ${dbPath}`);
|
|
98
|
-
console.log(`token: ${TEST_TOKEN}\n`);
|
|
99
|
-
|
|
100
|
-
installTestToken();
|
|
101
|
-
|
|
102
|
-
const storage = new SQLiteStorage(dbPath);
|
|
103
|
-
const ruleEngine = new RuleEngine();
|
|
104
|
-
const skillRegistry = new SkillRegistry();
|
|
105
|
-
const guard = new InvocationGuard();
|
|
106
|
-
|
|
107
|
-
const allSkills = skillRegistry.getAll();
|
|
108
|
-
console.log(`Registry loaded ${allSkills.length} skills`);
|
|
109
|
-
|
|
110
|
-
// ── Start WebServer ──────────────────────────────────────────────────────
|
|
111
|
-
const server = new WebServer({
|
|
112
|
-
port: 0,
|
|
113
|
-
storage,
|
|
114
|
-
ruleEngine,
|
|
115
|
-
skillRegistry,
|
|
116
|
-
invocationGuard: guard,
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
// Pull the express app out and listen on a random port (matches mcp-server.test.ts).
|
|
120
|
-
const app = (server as unknown as { app: import('express').Application }).app;
|
|
121
|
-
const port: number = await new Promise<number>((resolve) => {
|
|
122
|
-
const handle = app.listen(0, () => {
|
|
123
|
-
const addr = handle.address() as AddressInfo;
|
|
124
|
-
// Also give server.server back so afterEach-style cleanup keeps a handle.
|
|
125
|
-
(server as unknown as { server: ReturnType<typeof app.listen> | null }).server = handle;
|
|
126
|
-
resolve(addr.port);
|
|
127
|
-
});
|
|
128
|
-
});
|
|
129
|
-
console.log(`Server listening on http://127.0.0.1:${port}/mcp\n`);
|
|
130
|
-
|
|
131
|
-
const activeClients: Client[] = [];
|
|
132
|
-
|
|
133
|
-
let exitCode = 0;
|
|
134
|
-
try {
|
|
135
|
-
// ── Test 1: tools/list ────────────────────────────────────────────────
|
|
136
|
-
{
|
|
137
|
-
const client = await newClient(port, authHeaders());
|
|
138
|
-
activeClients.push(client);
|
|
139
|
-
const r = await client.listTools();
|
|
140
|
-
const names = r.tools.map((t) => t.name);
|
|
141
|
-
record(
|
|
142
|
-
'list tools',
|
|
143
|
-
names.includes('skill_invoke') && names.includes('skill_list'),
|
|
144
|
-
`tools: ${names.join(', ')}`,
|
|
145
|
-
);
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// ── Test 2: skill_list returns skills ─────────────────────────────────
|
|
149
|
-
{
|
|
150
|
-
const client = await newClient(port, authHeaders());
|
|
151
|
-
activeClients.push(client);
|
|
152
|
-
const r = await client.callTool({ name: 'skill_list', arguments: {} });
|
|
153
|
-
const content = r.content as Array<{ type: string; text: string }>;
|
|
154
|
-
const parsed = JSON.parse(content[0].text) as { skills: unknown[]; total: number };
|
|
155
|
-
record(
|
|
156
|
-
'skill_list returns skills',
|
|
157
|
-
parsed.total > 0 && parsed.skills.length === parsed.total,
|
|
158
|
-
`${parsed.total} skills returned`,
|
|
159
|
-
);
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// ── Test 3: skill_invoke base call (with explicit X-Forge-* headers) ──
|
|
163
|
-
const baseSession = 'e2e-session-1';
|
|
164
|
-
const baseRouteId = 'e2e-route-1';
|
|
165
|
-
const baseAgent = 'e2e-agent-bmad';
|
|
166
|
-
let invokeContentLen = 0;
|
|
167
|
-
{
|
|
168
|
-
const client = await newClient(
|
|
169
|
-
port,
|
|
170
|
-
authHeaders({
|
|
171
|
-
'X-Forge-Session-Id': baseSession,
|
|
172
|
-
'X-Forge-Route-Request-Id': baseRouteId,
|
|
173
|
-
'X-Forge-Agent-Id': baseAgent,
|
|
174
|
-
}),
|
|
175
|
-
);
|
|
176
|
-
activeClients.push(client);
|
|
177
|
-
const r = await client.callTool({
|
|
178
|
-
name: 'skill_invoke',
|
|
179
|
-
arguments: { skill_id: 'official-tdd', reason: 'bmad/implement:e2e-test' },
|
|
180
|
-
});
|
|
181
|
-
const content = r.content as Array<{ type: string; text: string }>;
|
|
182
|
-
invokeContentLen = content[0]?.text?.length ?? 0;
|
|
183
|
-
record(
|
|
184
|
-
'skill_invoke base',
|
|
185
|
-
!r.isError && invokeContentLen > 100 && content[0].text.startsWith('# '),
|
|
186
|
-
`content length ${invokeContentLen}`,
|
|
187
|
-
);
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
// ── Test 4: DB row inserted with all linkage fields ───────────────────
|
|
191
|
-
{
|
|
192
|
-
const rows = storage.querySkillInvocations({ session_id: baseSession });
|
|
193
|
-
const row = rows[0];
|
|
194
|
-
record('DB row inserted', rows.length === 1, `rows count = ${rows.length}`);
|
|
195
|
-
record(
|
|
196
|
-
'workflow parsed',
|
|
197
|
-
row?.workflow === 'bmad' && row?.phase === 'implement',
|
|
198
|
-
`workflow=${row?.workflow}, phase=${row?.phase}`,
|
|
199
|
-
);
|
|
200
|
-
record(
|
|
201
|
-
'feature_slug parsed',
|
|
202
|
-
row?.feature_slug === 'e2e-test',
|
|
203
|
-
`feature_slug=${row?.feature_slug}`,
|
|
204
|
-
);
|
|
205
|
-
record(
|
|
206
|
-
'session/route/agent linkage',
|
|
207
|
-
row?.session_id === baseSession &&
|
|
208
|
-
row?.route_request_id === baseRouteId &&
|
|
209
|
-
row?.agent_id === baseAgent,
|
|
210
|
-
`session=${row?.session_id}, route=${row?.route_request_id}, agent=${row?.agent_id}`,
|
|
211
|
-
);
|
|
212
|
-
record(
|
|
213
|
-
'invocation_type=dynamic + success=1',
|
|
214
|
-
row?.invocation_type === 'dynamic' && row?.success === 1,
|
|
215
|
-
`invocation_type=${row?.invocation_type}, success=${row?.success}`,
|
|
216
|
-
);
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
// ── Test 5: Multi-phase aggregation via queryWorkflowProgress ─────────
|
|
220
|
-
// Use a fresh session to avoid the per-session idempotency guard, and
|
|
221
|
-
// a single shared route_request_id so all phases group together.
|
|
222
|
-
const multiSession = 'e2e-session-multi';
|
|
223
|
-
const multiRouteId = 'e2e-route-multi';
|
|
224
|
-
const multiAgent = 'e2e-agent-multi';
|
|
225
|
-
{
|
|
226
|
-
const phaseSkills: Array<[string, string]> = [
|
|
227
|
-
['analyze', 'official-spec-driven-design'],
|
|
228
|
-
['design', 'official-architecture-decision'],
|
|
229
|
-
['implement', 'official-tdd'],
|
|
230
|
-
];
|
|
231
|
-
for (const [phase, skill] of phaseSkills) {
|
|
232
|
-
const c = await newClient(
|
|
233
|
-
port,
|
|
234
|
-
authHeaders({
|
|
235
|
-
'X-Forge-Session-Id': multiSession,
|
|
236
|
-
'X-Forge-Route-Request-Id': multiRouteId,
|
|
237
|
-
'X-Forge-Agent-Id': multiAgent,
|
|
238
|
-
}),
|
|
239
|
-
);
|
|
240
|
-
activeClients.push(c);
|
|
241
|
-
await c.callTool({
|
|
242
|
-
name: 'skill_invoke',
|
|
243
|
-
arguments: { skill_id: skill, reason: `bmad/${phase}:multi-phase-test` },
|
|
244
|
-
});
|
|
245
|
-
}
|
|
246
|
-
const progress = storage.queryWorkflowProgress({ session_id: multiSession });
|
|
247
|
-
const ok =
|
|
248
|
-
progress.length === 1 &&
|
|
249
|
-
progress[0].workflow === 'bmad' &&
|
|
250
|
-
progress[0].route_request_id === multiRouteId &&
|
|
251
|
-
progress[0].phases_completed.length === 3 &&
|
|
252
|
-
progress[0].phases_completed.includes('analyze') &&
|
|
253
|
-
progress[0].phases_completed.includes('design') &&
|
|
254
|
-
progress[0].phases_completed.includes('implement') &&
|
|
255
|
-
progress[0].feature_slug === 'multi-phase-test';
|
|
256
|
-
record(
|
|
257
|
-
'multi-phase aggregation',
|
|
258
|
-
ok,
|
|
259
|
-
`routes=${progress.length}, phases=[${progress[0]?.phases_completed.join(', ')}], feature=${progress[0]?.feature_slug}`,
|
|
260
|
-
);
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// ── Test 6: Error path — non-existent skill records success=0 ────────
|
|
264
|
-
const errSession = 'e2e-session-error';
|
|
265
|
-
{
|
|
266
|
-
const client = await newClient(
|
|
267
|
-
port,
|
|
268
|
-
authHeaders({ 'X-Forge-Session-Id': errSession }),
|
|
269
|
-
);
|
|
270
|
-
activeClients.push(client);
|
|
271
|
-
const r = await client.callTool({
|
|
272
|
-
name: 'skill_invoke',
|
|
273
|
-
arguments: { skill_id: 'nonexistent-skill-xyz', reason: 'bmad/analyze' },
|
|
274
|
-
});
|
|
275
|
-
const isErrorFlag = r.isError === true;
|
|
276
|
-
const rows = storage.querySkillInvocations({ session_id: errSession });
|
|
277
|
-
const row = rows[0];
|
|
278
|
-
record(
|
|
279
|
-
'error skill returns isError + records success=0',
|
|
280
|
-
isErrorFlag &&
|
|
281
|
-
rows.length === 1 &&
|
|
282
|
-
row.success === 0 &&
|
|
283
|
-
(row.error?.includes('Skill not found') ?? false),
|
|
284
|
-
`isError=${isErrorFlag}, rows=${rows.length}, success=${row?.success}, error="${row?.error?.slice(0, 60) ?? ''}"`,
|
|
285
|
-
);
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
// ── Test 7: Guard blocks repeated calls (idempotency) ─────────────────
|
|
289
|
-
const dupSession = 'e2e-session-dup';
|
|
290
|
-
{
|
|
291
|
-
// First invocation succeeds
|
|
292
|
-
const c1 = await newClient(
|
|
293
|
-
port,
|
|
294
|
-
authHeaders({ 'X-Forge-Session-Id': dupSession }),
|
|
295
|
-
);
|
|
296
|
-
activeClients.push(c1);
|
|
297
|
-
await c1.callTool({
|
|
298
|
-
name: 'skill_invoke',
|
|
299
|
-
arguments: { skill_id: 'official-tdd', reason: 'first call' },
|
|
300
|
-
});
|
|
301
|
-
|
|
302
|
-
// Second invocation of same skill in same session should be blocked
|
|
303
|
-
const c2 = await newClient(
|
|
304
|
-
port,
|
|
305
|
-
authHeaders({ 'X-Forge-Session-Id': dupSession }),
|
|
306
|
-
);
|
|
307
|
-
activeClients.push(c2);
|
|
308
|
-
const r2 = await c2.callTool({
|
|
309
|
-
name: 'skill_invoke',
|
|
310
|
-
arguments: { skill_id: 'official-tdd', reason: 'duplicate' },
|
|
311
|
-
});
|
|
312
|
-
|
|
313
|
-
const rows = storage.querySkillInvocations({ session_id: dupSession });
|
|
314
|
-
// Rows ordered by timestamp DESC: rows[0] is the duplicate (Blocked), rows[1] is the first
|
|
315
|
-
const blockedRow = rows.find((r) => r.success === 0 && r.error?.includes('Blocked'));
|
|
316
|
-
const successRow = rows.find((r) => r.success === 1);
|
|
317
|
-
record(
|
|
318
|
-
'guard blocks repeated calls',
|
|
319
|
-
r2.isError === true &&
|
|
320
|
-
rows.length === 2 &&
|
|
321
|
-
blockedRow !== undefined &&
|
|
322
|
-
successRow !== undefined,
|
|
323
|
-
`2 rows; isError=${r2.isError}; success=${successRow?.success}, blocked.error="${blockedRow?.error?.slice(0, 60) ?? ''}"`,
|
|
324
|
-
);
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
// ── Test 8: Fallback to routingState when X-Forge-Session-Id missing ──
|
|
328
|
-
const fallbackSession = 'e2e-session-fallback';
|
|
329
|
-
const fallbackRouteId = 'e2e-route-fallback';
|
|
330
|
-
{
|
|
331
|
-
routingState.setRouting(fallbackSession, {
|
|
332
|
-
agentName: 'fallback-agent',
|
|
333
|
-
agentDescription: 'desc',
|
|
334
|
-
timestamp: Date.now(),
|
|
335
|
-
routeRequestId: fallbackRouteId,
|
|
336
|
-
});
|
|
337
|
-
// No X-Forge-* headers — server should fall back to routingState.getMostRecent().
|
|
338
|
-
const client = await newClient(port, authHeaders());
|
|
339
|
-
activeClients.push(client);
|
|
340
|
-
await client.callTool({
|
|
341
|
-
name: 'skill_invoke',
|
|
342
|
-
arguments: { skill_id: 'official-refactor', reason: 'fallback test' },
|
|
343
|
-
});
|
|
344
|
-
|
|
345
|
-
const rows = storage.querySkillInvocations({ session_id: fallbackSession });
|
|
346
|
-
const row = rows[0];
|
|
347
|
-
record(
|
|
348
|
-
'routingState fallback (no headers)',
|
|
349
|
-
rows.length === 1 &&
|
|
350
|
-
row?.route_request_id === fallbackRouteId &&
|
|
351
|
-
row?.agent_id === 'fallback-agent',
|
|
352
|
-
`rows=${rows.length}, route=${row?.route_request_id}, agent=${row?.agent_id}`,
|
|
353
|
-
);
|
|
354
|
-
routingState.clearRouting(fallbackSession);
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
// ── Test 9: mcp-anon-* fallback when neither header nor routingState ──
|
|
358
|
-
{
|
|
359
|
-
// routingState should be empty by now (we cleared the fallback above and
|
|
360
|
-
// no daemon is feeding it). Issue a call and look for an mcp-anon-* row.
|
|
361
|
-
const before = Date.now();
|
|
362
|
-
const client = await newClient(port, authHeaders());
|
|
363
|
-
activeClients.push(client);
|
|
364
|
-
await client.callTool({
|
|
365
|
-
name: 'skill_invoke',
|
|
366
|
-
arguments: { skill_id: 'official-debug', reason: 'anon test' },
|
|
367
|
-
});
|
|
368
|
-
|
|
369
|
-
// Fetch all rows and filter for mcp-anon-* with timestamp >= before.
|
|
370
|
-
// querySkillInvocations is keyed by session_id so we use a wide query.
|
|
371
|
-
const allRows = storage.querySkillInvocations({});
|
|
372
|
-
const anonRow = allRows.find(
|
|
373
|
-
(r) => r.session_id.startsWith('mcp-anon-') && r.timestamp >= before,
|
|
374
|
-
);
|
|
375
|
-
record(
|
|
376
|
-
'mcp-anon-* fallback (no header, no routingState)',
|
|
377
|
-
anonRow !== undefined && anonRow.success === 1,
|
|
378
|
-
`session_id=${anonRow?.session_id ?? '<none>'}`,
|
|
379
|
-
);
|
|
380
|
-
}
|
|
381
|
-
} catch (err) {
|
|
382
|
-
console.error('\n[FATAL] Unhandled error during tests:', err);
|
|
383
|
-
record('e2e-script', false, err instanceof Error ? err.message : String(err));
|
|
384
|
-
exitCode = 1;
|
|
385
|
-
} finally {
|
|
386
|
-
// Cleanup: clients first, then HTTP server, then storage, then tmp dir.
|
|
387
|
-
for (const c of activeClients) {
|
|
388
|
-
try {
|
|
389
|
-
await c.close();
|
|
390
|
-
} catch {
|
|
391
|
-
/* ignore */
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
const httpServer = (server as unknown as { server: import('node:net').Server | null }).server;
|
|
395
|
-
if (httpServer) {
|
|
396
|
-
(httpServer as unknown as { closeAllConnections?: () => void }).closeAllConnections?.();
|
|
397
|
-
await new Promise<void>((res) => httpServer.close(() => res()));
|
|
398
|
-
}
|
|
399
|
-
storage.close();
|
|
400
|
-
rmSync(tmp, { recursive: true, force: true });
|
|
401
|
-
restoreTestToken();
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
// ── Final report ─────────────────────────────────────────────────────────
|
|
405
|
-
const passed = results.filter((r) => r.pass).length;
|
|
406
|
-
const total = results.length;
|
|
407
|
-
console.log(`\n=== Result: ${passed}/${total} passed ===`);
|
|
408
|
-
if (passed !== total) {
|
|
409
|
-
console.log('\nFailures:');
|
|
410
|
-
for (const r of results.filter((x) => !x.pass)) {
|
|
411
|
-
console.log(` - ${r.step}: ${r.details}`);
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
process.exit(passed === total && exitCode === 0 ? 0 : 1);
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
main().catch((err) => {
|
|
419
|
-
console.error('E2E failed:', err);
|
|
420
|
-
restoreTestToken();
|
|
421
|
-
rmSync(tmp, { recursive: true, force: true });
|
|
422
|
-
process.exit(1);
|
|
423
|
-
});
|
|
@@ -1,253 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env tsx
|
|
2
|
-
/**
|
|
3
|
-
* E2E 验证:6 个方法论 Agent 的 prompt 质量
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { OFFICIAL_AGENTS, findOfficialAgent } from '../src/agents/index.js';
|
|
7
|
-
import { METHODOLOGY_PRESETS } from '../src/agents/methodologies/presets.js';
|
|
8
|
-
import { getPhase, getAllPhases, buildMethodologyAgent } from '../src/agents/methodologies/agent-builder.js';
|
|
9
|
-
import { OFFICIAL_SKILL_KEYWORDS } from '../src/skills/official-skills.js';
|
|
10
|
-
import { parseWorkflowMeta } from '../src/skills/tools/skill-invoke.js';
|
|
11
|
-
|
|
12
|
-
interface Issue {
|
|
13
|
-
agent: string;
|
|
14
|
-
severity: 'error' | 'warning' | 'info';
|
|
15
|
-
message: string;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
const issues: Issue[] = [];
|
|
19
|
-
|
|
20
|
-
function record(agent: string, severity: Issue['severity'], message: string) {
|
|
21
|
-
issues.push({ agent, severity, message });
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
const PRESET_IDS = [
|
|
25
|
-
'bmad-feature-full',
|
|
26
|
-
'bmad-feature-quick',
|
|
27
|
-
'bmad-spec-only',
|
|
28
|
-
'harness-debug-full',
|
|
29
|
-
'harness-hotfix',
|
|
30
|
-
'harness-investigate',
|
|
31
|
-
];
|
|
32
|
-
|
|
33
|
-
const VALID_SKILLS = new Set(Object.keys(OFFICIAL_SKILL_KEYWORDS));
|
|
34
|
-
|
|
35
|
-
console.log('=== 方法论 Agent Prompt 质量 E2E 验证 ===\n');
|
|
36
|
-
|
|
37
|
-
// ── Check 1: 6 个 preset 都注册成功 ──────────────────────
|
|
38
|
-
console.log('-- Check 1: Preset 注册检查 --');
|
|
39
|
-
for (const id of PRESET_IDS) {
|
|
40
|
-
const agent = findOfficialAgent(id);
|
|
41
|
-
if (!agent) {
|
|
42
|
-
record(id, 'error', '未注册到 OFFICIAL_AGENTS');
|
|
43
|
-
} else {
|
|
44
|
-
console.log(`✓ ${id}: 已注册 (version=${agent.version}, skills=${agent.skills?.length ?? 0})`);
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
// ── Check 2: 每个 preset 的 phase 列表合法 ────────────────
|
|
49
|
-
console.log('\n-- Check 2: Phase 列表合法性 --');
|
|
50
|
-
for (const preset of METHODOLOGY_PRESETS) {
|
|
51
|
-
for (const phaseId of preset.phases) {
|
|
52
|
-
const phase = getPhase(phaseId);
|
|
53
|
-
if (!phase) {
|
|
54
|
-
record(preset.id, 'error', `phase ${phaseId} 不存在`);
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
console.log(`(已检查 ${METHODOLOGY_PRESETS.length} 个 preset 的 phase 列表)`);
|
|
59
|
-
|
|
60
|
-
// ── Check 3: prompt 内容质量 ─────────────────────────────
|
|
61
|
-
console.log('\n-- Check 3: prompt 内容质量 --');
|
|
62
|
-
for (const id of PRESET_IDS) {
|
|
63
|
-
const agent = findOfficialAgent(id);
|
|
64
|
-
if (!agent) continue;
|
|
65
|
-
const content = agent.content;
|
|
66
|
-
|
|
67
|
-
// 必须包含
|
|
68
|
-
if (!content.includes('软性建议') && !content.includes('软性')) {
|
|
69
|
-
record(id, 'warning', 'prompt 未明确"软性建议"语义');
|
|
70
|
-
}
|
|
71
|
-
if (content.length < 500) {
|
|
72
|
-
record(id, 'warning', `prompt 过短(${content.length} 字符),可能内容缺失`);
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
// 每个 phase 都该出现在 prompt 里
|
|
76
|
-
const preset = METHODOLOGY_PRESETS.find(p => p.id === id);
|
|
77
|
-
if (preset) {
|
|
78
|
-
for (const phaseId of preset.phases) {
|
|
79
|
-
const phase = getPhase(phaseId);
|
|
80
|
-
if (phase && !content.includes(`Phase: ${phase.id}`)) {
|
|
81
|
-
record(id, 'error', `prompt 缺少 "Phase: ${phase.id}" 标识`);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
console.log(` ${id}: ${content.length} 字符`);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// ── Check 4: reason 格式约定一致 ─────────────────────────
|
|
89
|
-
// Prompt 里的 reason 用 <feature-slug> / <bug-slug> 当模板占位符。
|
|
90
|
-
// 运行时 LLM 会替换成实际 slug,再传给 skill_invoke。
|
|
91
|
-
// 因此校验时把占位符替换成示例 slug,再过 parseWorkflowMeta。
|
|
92
|
-
console.log('\n-- Check 4: reason 格式 --');
|
|
93
|
-
const TEMPLATE_PLACEHOLDERS = /<(feature-slug|bug-slug)>/g;
|
|
94
|
-
const PLACEHOLDER_SUBSTITUTE = 'example-slug';
|
|
95
|
-
|
|
96
|
-
for (const id of PRESET_IDS) {
|
|
97
|
-
const agent = findOfficialAgent(id);
|
|
98
|
-
if (!agent) continue;
|
|
99
|
-
|
|
100
|
-
// 提取所有 reason: "..." 出现
|
|
101
|
-
const reasonMatches = [...agent.content.matchAll(/reason:\s*"([^"]+)"/g)];
|
|
102
|
-
for (const match of reasonMatches) {
|
|
103
|
-
const reasonTemplate = match[1];
|
|
104
|
-
const reasonResolved = reasonTemplate.replace(TEMPLATE_PLACEHOLDERS, PLACEHOLDER_SUBSTITUTE);
|
|
105
|
-
|
|
106
|
-
// 子检查 4a:原始模板必须含 <feature-slug> 或 <bug-slug>(强制带 slug 元数据)
|
|
107
|
-
const hasSlugPlaceholder = reasonTemplate.includes('<feature-slug>') || reasonTemplate.includes('<bug-slug>');
|
|
108
|
-
if (!hasSlugPlaceholder) {
|
|
109
|
-
record(id, 'info', `reason "${reasonTemplate}" 缺少 <feature-slug>/<bug-slug> 占位符(仍可工作,但 slug 元数据会缺失)`);
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
// 子检查 4b:占位符替换后必须能被解析
|
|
113
|
-
const meta = parseWorkflowMeta(reasonResolved);
|
|
114
|
-
if (!meta.workflow) {
|
|
115
|
-
record(id, 'error', `reason "${reasonTemplate}" 替换占位符后 ("${reasonResolved}") 仍无法被 parseWorkflowMeta 解析`);
|
|
116
|
-
} else if (meta.workflow !== 'bmad' && meta.workflow !== 'harness') {
|
|
117
|
-
record(id, 'error', `reason "${reasonTemplate}" workflow=${meta.workflow}(应为 bmad/harness)`);
|
|
118
|
-
} else {
|
|
119
|
-
// 子检查 4c:解析出来的 workflow 应该跟 preset 主线一致
|
|
120
|
-
const preset = METHODOLOGY_PRESETS.find(p => p.id === id);
|
|
121
|
-
const presetWorkflow = id.startsWith('bmad-') ? 'bmad' : id.startsWith('harness-') ? 'harness' : null;
|
|
122
|
-
if (presetWorkflow && meta.workflow !== presetWorkflow) {
|
|
123
|
-
record(id, 'warning', `reason "${reasonTemplate}" workflow=${meta.workflow}(preset 主线=${presetWorkflow})`);
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
if (reasonMatches.length === 0) {
|
|
129
|
-
const preset = METHODOLOGY_PRESETS.find(p => p.id === id);
|
|
130
|
-
const needsSkill = preset?.phases.some(p => getPhase(p)?.skillId);
|
|
131
|
-
if (needsSkill) {
|
|
132
|
-
record(id, 'warning', '至少有 phase 需要调用 skill 但 prompt 里找不到 reason: "..." 字面量');
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
console.log(` ${id}: 发现 ${reasonMatches.length} 个 reason 引用`);
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
// ── Check 5: 所有引用的 skill 都真实存在 ─────────────────
|
|
139
|
-
console.log('\n-- Check 5: Skill 引用有效性 --');
|
|
140
|
-
let phaseSkillCount = 0;
|
|
141
|
-
for (const phase of getAllPhases()) {
|
|
142
|
-
if (phase.skillId) {
|
|
143
|
-
phaseSkillCount++;
|
|
144
|
-
if (!VALID_SKILLS.has(phase.skillId)) {
|
|
145
|
-
record(`phase:${phase.workflow}/${phase.id}`, 'error',
|
|
146
|
-
`skillId "${phase.skillId}" 不在 OFFICIAL_SKILL_KEYWORDS`);
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
console.log(` 共 ${getAllPhases().length} 个 phase,其中 ${phaseSkillCount} 个引用 skill`);
|
|
151
|
-
|
|
152
|
-
// ── Check 6: artifact 路径模板合理 ───────────────────────
|
|
153
|
-
console.log('\n-- Check 6: artifact 路径模板 --');
|
|
154
|
-
for (const phase of getAllPhases()) {
|
|
155
|
-
if (phase.artifactHint) {
|
|
156
|
-
// 检查路径是否在 docs/ 下(按用户决策)
|
|
157
|
-
if (!phase.artifactHint.startsWith('docs/') && !phase.artifactHint.startsWith('src/') && !phase.artifactHint.startsWith('tests/')) {
|
|
158
|
-
record(`phase:${phase.workflow}/${phase.id}`, 'warning',
|
|
159
|
-
`artifact 路径 "${phase.artifactHint}" 不在 docs/src/tests 下`);
|
|
160
|
-
}
|
|
161
|
-
// 检查是否含 <feature-slug> 或 <bug-slug> 占位符
|
|
162
|
-
if (!phase.artifactHint.includes('<')) {
|
|
163
|
-
record(`phase:${phase.workflow}/${phase.id}`, 'info',
|
|
164
|
-
`artifact 路径 "${phase.artifactHint}" 没有占位符(不可变路径?)`);
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
console.log(` (已检查所有 phase 的 artifactHint)`);
|
|
169
|
-
|
|
170
|
-
// ── Check 7: 跨方法论 Phase 没混淆 ───────────────────────
|
|
171
|
-
console.log('\n-- Check 7: 跨方法论组合 --');
|
|
172
|
-
for (const preset of METHODOLOGY_PRESETS) {
|
|
173
|
-
const workflows = new Set(preset.phases.map(p => getPhase(p)?.workflow));
|
|
174
|
-
// official preset 应该是单方法论的
|
|
175
|
-
if (workflows.size > 1) {
|
|
176
|
-
record(preset.id, 'info',
|
|
177
|
-
`跨方法论组合:${[...workflows].join(', ')}(official 预设建议单方法论)`);
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
console.log(` (已检查 ${METHODOLOGY_PRESETS.length} 个 preset 的 workflow 一致性)`);
|
|
181
|
-
|
|
182
|
-
// ── Check 8: Phase 的 skillId 跟 workflow 一致性 ──────────
|
|
183
|
-
console.log('\n-- Check 8: Skill 与 workflow 一致性 --');
|
|
184
|
-
for (const phase of getAllPhases()) {
|
|
185
|
-
if (phase.skillId) {
|
|
186
|
-
// 简单检查:bmad phase 的 skill 不该是纯 harness 类的
|
|
187
|
-
// (这是 heuristic,不是硬规则)
|
|
188
|
-
if (phase.workflow === 'bmad' && phase.skillId.includes('harness')) {
|
|
189
|
-
record(`phase:${phase.workflow}/${phase.id}`, 'warning',
|
|
190
|
-
`bmad phase 引用 harness skill "${phase.skillId}"`);
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
console.log(` (已检查所有 phase 的 skill 归属)`);
|
|
195
|
-
|
|
196
|
-
// ── Check 9: buildMethodologyAgent 是确定性的 ────────────
|
|
197
|
-
console.log('\n-- Check 9: buildMethodologyAgent 确定性 --');
|
|
198
|
-
const preset = METHODOLOGY_PRESETS[0];
|
|
199
|
-
const built1 = buildMethodologyAgent(preset);
|
|
200
|
-
const built2 = buildMethodologyAgent(preset);
|
|
201
|
-
if (built1.content !== built2.content) {
|
|
202
|
-
record(preset.id, 'error', 'buildMethodologyAgent 输出不确定');
|
|
203
|
-
} else {
|
|
204
|
-
console.log(` ✓ ${preset.id} 多次构建结果一致`);
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// ── Check 10: harness 系列的 slug 占位符在整份 prompt 里保持一致 ──
|
|
208
|
-
// agent-builder 的"软性建议"行硬编码 `<feature-slug>`,但 harness phase body
|
|
209
|
-
// 全部使用 `<bug-slug>`。同一份 prompt 混用两种 slug 名会误导 LLM。
|
|
210
|
-
console.log('\n-- Check 10: slug 占位符在 prompt 内部一致 --');
|
|
211
|
-
for (const id of PRESET_IDS) {
|
|
212
|
-
const agent = findOfficialAgent(id);
|
|
213
|
-
if (!agent) continue;
|
|
214
|
-
const content = agent.content;
|
|
215
|
-
const hasFeatureSlug = content.includes('<feature-slug>');
|
|
216
|
-
const hasBugSlug = content.includes('<bug-slug>');
|
|
217
|
-
const isHarness = id.startsWith('harness-');
|
|
218
|
-
const isBmad = id.startsWith('bmad-');
|
|
219
|
-
|
|
220
|
-
if (isHarness && hasFeatureSlug) {
|
|
221
|
-
record(id, 'warning',
|
|
222
|
-
`harness preset 里出现 <feature-slug>(应为 <bug-slug>)— 可能来自 agent-builder 模板硬编码`);
|
|
223
|
-
}
|
|
224
|
-
if (isBmad && hasBugSlug) {
|
|
225
|
-
record(id, 'warning',
|
|
226
|
-
`bmad preset 里出现 <bug-slug>(应为 <feature-slug>)`);
|
|
227
|
-
}
|
|
228
|
-
if (hasFeatureSlug && hasBugSlug) {
|
|
229
|
-
record(id, 'warning',
|
|
230
|
-
`prompt 同时出现 <feature-slug> 和 <bug-slug>,会让 LLM 困惑`);
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// ── 输出报告 ─────────────────────────────────────────────
|
|
235
|
-
const errors = issues.filter(i => i.severity === 'error');
|
|
236
|
-
const warnings = issues.filter(i => i.severity === 'warning');
|
|
237
|
-
const infos = issues.filter(i => i.severity === 'info');
|
|
238
|
-
|
|
239
|
-
console.log('\n=== 问题汇总 ===');
|
|
240
|
-
console.log(`Errors: ${errors.length}`);
|
|
241
|
-
console.log(`Warnings: ${warnings.length}`);
|
|
242
|
-
console.log(`Infos: ${infos.length}`);
|
|
243
|
-
|
|
244
|
-
if (issues.length > 0) {
|
|
245
|
-
console.log('\n=== 问题详情 ===');
|
|
246
|
-
for (const issue of issues) {
|
|
247
|
-
const icon = issue.severity === 'error' ? '✗' : issue.severity === 'warning' ? '⚠' : 'ℹ';
|
|
248
|
-
console.log(`${icon} [${issue.agent}] ${issue.message}`);
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
console.log(`\n=== Result: ${errors.length === 0 ? 'PASS' : 'FAIL'} ===`);
|
|
253
|
-
process.exit(errors.length === 0 ? 0 : 1);
|