forge-workflow 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.claude/commands/dev.md +314 -0
  2. package/.claude/commands/plan.md +389 -0
  3. package/.claude/commands/premerge.md +179 -0
  4. package/.claude/commands/research.md +42 -0
  5. package/.claude/commands/review.md +442 -0
  6. package/.claude/commands/rollback.md +721 -0
  7. package/.claude/commands/ship.md +134 -0
  8. package/.claude/commands/sonarcloud.md +152 -0
  9. package/.claude/commands/status.md +77 -0
  10. package/.claude/commands/validate.md +237 -0
  11. package/.claude/commands/verify.md +221 -0
  12. package/.claude/rules/greptile-review-process.md +285 -0
  13. package/.claude/rules/workflow.md +105 -0
  14. package/.claude/scripts/greptile-resolve.sh +526 -0
  15. package/.claude/scripts/load-env.sh +32 -0
  16. package/.forge/hooks/check-tdd.js +240 -0
  17. package/.github/PLUGIN_TEMPLATE.json +32 -0
  18. package/.mcp.json.example +12 -0
  19. package/AGENTS.md +169 -0
  20. package/CLAUDE.md +99 -0
  21. package/LICENSE +21 -0
  22. package/README.md +414 -0
  23. package/bin/forge-cmd.js +313 -0
  24. package/bin/forge-validate.js +303 -0
  25. package/bin/forge.js +4228 -0
  26. package/docs/AGENT_INSTALL_PROMPT.md +342 -0
  27. package/docs/ENHANCED_ONBOARDING.md +602 -0
  28. package/docs/EXAMPLES.md +482 -0
  29. package/docs/GREPTILE_SETUP.md +400 -0
  30. package/docs/MANUAL_REVIEW_GUIDE.md +106 -0
  31. package/docs/ROADMAP.md +359 -0
  32. package/docs/SETUP.md +632 -0
  33. package/docs/TOOLCHAIN.md +849 -0
  34. package/docs/VALIDATION.md +363 -0
  35. package/docs/WORKFLOW.md +400 -0
  36. package/docs/planning/PROGRESS.md +396 -0
  37. package/docs/plans/.gitkeep +0 -0
  38. package/docs/plans/2026-02-27-forge-test-suite-v2-decisions.md +21 -0
  39. package/docs/plans/2026-02-27-forge-test-suite-v2-design.md +362 -0
  40. package/docs/plans/2026-02-27-forge-test-suite-v2-tasks.md +343 -0
  41. package/docs/plans/2026-03-02-superpowers-gaps-decisions.md +26 -0
  42. package/docs/plans/2026-03-02-superpowers-gaps-design.md +239 -0
  43. package/docs/plans/2026-03-02-superpowers-gaps-tasks.md +260 -0
  44. package/docs/plans/2026-03-04-agent-command-parity-design.md +163 -0
  45. package/docs/plans/2026-03-04-verify-worktree-cleanup-decisions.md +7 -0
  46. package/docs/plans/2026-03-04-verify-worktree-cleanup-design.md +165 -0
  47. package/docs/plans/2026-03-05-forge-uto-decisions.md +6 -0
  48. package/docs/plans/2026-03-05-forge-uto-design.md +116 -0
  49. package/docs/plans/2026-03-05-forge-uto-tasks.md +244 -0
  50. package/docs/plans/2026-03-10-command-creator-and-eval-decisions.md +52 -0
  51. package/docs/plans/2026-03-10-command-creator-and-eval-design.md +350 -0
  52. package/docs/plans/2026-03-10-command-creator-and-eval-tasks.md +426 -0
  53. package/docs/plans/2026-03-10-stale-workflow-refs-decisions.md +8 -0
  54. package/docs/plans/2026-03-10-stale-workflow-refs-design.md +80 -0
  55. package/docs/plans/2026-03-10-stale-workflow-refs-tasks.md +90 -0
  56. package/docs/plans/2026-03-14-beads-plan-context-decisions.md +9 -0
  57. package/docs/plans/2026-03-14-beads-plan-context-design.md +171 -0
  58. package/docs/plans/2026-03-14-beads-plan-context-tasks.md +160 -0
  59. package/docs/plans/2026-03-14-skill-eval-loop-decisions.md +33 -0
  60. package/docs/plans/2026-03-14-skill-eval-loop-design.md +118 -0
  61. package/docs/plans/2026-03-14-skill-eval-loop-results.md +78 -0
  62. package/docs/plans/2026-03-14-skill-eval-loop-tasks.md +160 -0
  63. package/docs/plans/2026-03-15-agent-command-parity-v2-decisions.md +11 -0
  64. package/docs/plans/2026-03-15-agent-command-parity-v2-design.md +145 -0
  65. package/docs/plans/2026-03-15-agent-command-parity-v2-tasks.md +211 -0
  66. package/docs/research/TEMPLATE.md +292 -0
  67. package/docs/research/advanced-testing.md +297 -0
  68. package/docs/research/agent-permissions.md +167 -0
  69. package/docs/research/dependency-chain.md +328 -0
  70. package/docs/research/forge-workflow-v2.md +550 -0
  71. package/docs/research/plugin-architecture.md +772 -0
  72. package/docs/research/pr4-cli-automation.md +326 -0
  73. package/docs/research/premerge-verify-restructure.md +205 -0
  74. package/docs/research/skills-restructure.md +508 -0
  75. package/docs/research/sonarcloud-perfection-plan.md +166 -0
  76. package/docs/research/sonarcloud-quality-gate.md +184 -0
  77. package/docs/research/superpowers-integration.md +403 -0
  78. package/docs/research/superpowers.md +319 -0
  79. package/docs/research/test-environment.md +519 -0
  80. package/install.sh +1062 -0
  81. package/lefthook.yml +39 -0
  82. package/lib/agents/README.md +198 -0
  83. package/lib/agents/claude.plugin.json +28 -0
  84. package/lib/agents/cline.plugin.json +22 -0
  85. package/lib/agents/codex.plugin.json +19 -0
  86. package/lib/agents/copilot.plugin.json +24 -0
  87. package/lib/agents/cursor.plugin.json +25 -0
  88. package/lib/agents/kilocode.plugin.json +22 -0
  89. package/lib/agents/opencode.plugin.json +20 -0
  90. package/lib/agents/roo.plugin.json +23 -0
  91. package/lib/agents-config.js +2112 -0
  92. package/lib/commands/dev.js +513 -0
  93. package/lib/commands/plan.js +696 -0
  94. package/lib/commands/recommend.js +119 -0
  95. package/lib/commands/ship.js +377 -0
  96. package/lib/commands/status.js +378 -0
  97. package/lib/commands/validate.js +602 -0
  98. package/lib/context-merge.js +359 -0
  99. package/lib/plugin-catalog.js +360 -0
  100. package/lib/plugin-manager.js +166 -0
  101. package/lib/plugin-recommender.js +141 -0
  102. package/lib/project-discovery.js +491 -0
  103. package/lib/setup.js +118 -0
  104. package/lib/workflow-profiles.js +203 -0
  105. package/package.json +115 -0
@@ -0,0 +1,160 @@
1
+ # Skill Eval Loop — Task List
2
+
3
+ **Design doc**: [2026-03-14-skill-eval-loop-design.md](2026-03-14-skill-eval-loop-design.md)
4
+ **Beads**: forge-1jx
5
+ **Branch**: feat/skill-eval-loop
6
+
7
+ ---
8
+
9
+ ## Task 1: Create evals.json for parallel-web-search
10
+
11
+ **File(s)**: `skills/parallel-web-search/evals/evals.json`
12
+ **What to implement**: Write 12-15 trigger eval queries. Include:
13
+ - 6-8 should-trigger queries (web search, find sources, look up facts, current news)
14
+ - 4-5 should-NOT-trigger queries that test disambiguation against sibling skills:
15
+ - `deep-research` queries (market analysis, comprehensive reports)
16
+ - `web-extract` queries (scrape this URL, extract from page)
17
+ - `data-enrichment` queries (enrich company data, CRM lookup)
18
+ - 2-3 generic should-NOT-trigger queries (code review, fix this bug, etc.)
19
+
20
+ **Format**: JSON array of `{"query": "...", "should_trigger": true/false}`
21
+ **Commit**: `feat(evals): add trigger eval set for parallel-web-search`
22
+
23
+ ---
24
+
25
+ ## Task 2: Create evals.json for parallel-deep-research
26
+
27
+ **File(s)**: `skills/parallel-deep-research/evals/evals.json`
28
+ **What to implement**: Write 12-15 trigger eval queries. Include:
29
+ - 6-8 should-trigger queries (deep analysis, market research, comprehensive report, multi-source synthesis)
30
+ - 4-5 should-NOT-trigger disambiguation queries:
31
+ - `web-search` queries (quick lookup, find a source, current price)
32
+ - `web-extract` queries (scrape URL, extract pricing page)
33
+ - `data-enrichment` queries (enrich entity, structured data)
34
+ - 2-3 generic should-NOT-trigger queries
35
+
36
+ **Format**: JSON array of `{"query": "...", "should_trigger": true/false}`
37
+ **Commit**: `feat(evals): add trigger eval set for parallel-deep-research`
38
+
39
+ ---
40
+
41
+ ## Task 3: Create evals.json for parallel-web-extract
42
+
43
+ **File(s)**: `skills/parallel-web-extract/evals/evals.json`
44
+ **What to implement**: Write 12-15 trigger eval queries. Include:
45
+ - 6-8 should-trigger queries (scrape URL, extract content from page, get pricing from URL, pull docs from site)
46
+ - 4-5 should-NOT-trigger disambiguation queries:
47
+ - `web-search` queries (search for X, find sources)
48
+ - `deep-research` queries (analyze market, research report)
49
+ - `data-enrichment` queries (enrich company)
50
+ - 2-3 generic should-NOT-trigger queries
51
+
52
+ **Format**: JSON array of `{"query": "...", "should_trigger": true/false}`
53
+ **Commit**: `feat(evals): add trigger eval set for parallel-web-extract`
54
+
55
+ ---
56
+
57
+ ## Task 4: Create evals.json for parallel-data-enrichment
58
+
59
+ **File(s)**: `skills/parallel-data-enrichment/evals/evals.json`
60
+ **What to implement**: Write 12-15 trigger eval queries. Include:
61
+ - 6-8 should-trigger queries (enrich company data, CRM enrichment, lead qualification, entity lookup, structured data about company)
62
+ - 4-5 should-NOT-trigger disambiguation queries:
63
+ - `web-search` queries (search for X, find news)
64
+ - `deep-research` queries (market analysis, comprehensive report)
65
+ - `web-extract` queries (scrape this URL)
66
+ - 2-3 generic should-NOT-trigger queries
67
+
68
+ **Format**: JSON array of `{"query": "...", "should_trigger": true/false}`
69
+ **Commit**: `feat(evals): add trigger eval set for parallel-data-enrichment`
70
+
71
+ ---
72
+
73
+ ## Task 5: Create evals.json for citation-standards
74
+
75
+ **File(s)**: `skills/citation-standards/evals/evals.json`
76
+ **What to implement**: Write 10-12 trigger eval queries. Include:
77
+ - 5-6 should-trigger queries (write research doc, add citations, format sources, reference external source in docs/research/)
78
+ - 5-6 should-NOT-trigger queries (write code, fix bug, run tests, deploy, general web search, scrape URL)
79
+
80
+ **Format**: JSON array of `{"query": "...", "should_trigger": true/false}`
81
+ **Commit**: `feat(evals): add trigger eval set for citation-standards`
82
+
83
+ ---
84
+
85
+ ## Task 6: Create evals.json for sonarcloud-analysis
86
+
87
+ **File(s)**: `skills/sonarcloud-analysis/evals/evals.json`
88
+ **What to implement**: Write 10-12 trigger eval queries. Include:
89
+ - 5-6 should-trigger queries (check code quality, SonarCloud issues, security vulnerabilities, test coverage, quality gate status)
90
+ - 5-6 should-NOT-trigger queries (write code, deploy, web search, research, format citations)
91
+
92
+ **Format**: JSON array of `{"query": "...", "should_trigger": true/false}`
93
+ **Commit**: `feat(evals): add trigger eval set for sonarcloud-analysis`
94
+
95
+ ---
96
+
97
+ ## Task 7: Run skill-creator eval loop — Batch 1a (web-search + web-extract)
98
+
99
+ **What to do**: Invoke the `skill-creator` skill for trigger accuracy optimization on:
100
+ 1. `parallel-web-search`
101
+ 2. `parallel-web-extract`
102
+
103
+ Run 2 in parallel. The skill-creator handles: baseline measurement → train/test split → description improvement → re-eval → up to 5 iterations → benchmark generation.
104
+
105
+ **Expected output**: Before/after trigger rates, improved descriptions (if needed), benchmark reports.
106
+ **Commit**: `feat(skills): optimize trigger descriptions for web-search and web-extract`
107
+
108
+ ---
109
+
110
+ ## Task 8: Run skill-creator eval loop — Batch 1b (deep-research + data-enrichment)
111
+
112
+ **What to do**: Same as Task 7 but for:
113
+ 1. `parallel-deep-research`
114
+ 2. `parallel-data-enrichment`
115
+
116
+ **Expected output**: Before/after trigger rates, improved descriptions, benchmark reports.
117
+ **Commit**: `feat(skills): optimize trigger descriptions for deep-research and data-enrichment`
118
+
119
+ ---
120
+
121
+ ## Task 9: Run skill-creator eval loop — Batch 2 (citation-standards + sonarcloud-analysis)
122
+
123
+ **What to do**: Same as Task 7 but for:
124
+ 1. `citation-standards`
125
+ 2. `sonarcloud-analysis`
126
+
127
+ **Expected output**: Before/after trigger rates, improved descriptions, benchmark reports.
128
+ **Commit**: `feat(skills): optimize trigger descriptions for citation-standards and sonarcloud-analysis`
129
+
130
+ ---
131
+
132
+ ## Task 10: Cross-skill regression check
133
+
134
+ **What to do**: After all descriptions are optimized, run a final cross-skill check:
135
+ - Take the disambiguation queries from Tasks 1-4 (should-NOT-trigger for sibling skills)
136
+ - Verify the optimized descriptions don't cause false-positive triggers on sibling skills
137
+ - If regressions found: **PAUSE and ask user** (per ambiguity policy)
138
+
139
+ **Expected output**: Cross-skill trigger matrix showing each query vs. each Parallel AI skill.
140
+ **Commit**: `docs: add cross-skill trigger matrix`
141
+
142
+ ---
143
+
144
+ ## Task 11: Commit improved descriptions and before/after summary
145
+
146
+ **What to do**:
147
+ - Commit any SKILL.md description changes from the eval loops
148
+ - Create a summary document with before/after trigger rates for all 6 skills
149
+ - Save to `docs/plans/2026-03-14-skill-eval-loop-results.md`
150
+
151
+ **Commit**: `docs: add skill eval loop results summary`
152
+
153
+ ---
154
+
155
+ ## Ordering rationale
156
+
157
+ 1. Tasks 1-6 (eval sets) are independent — can be parallelized
158
+ 2. Tasks 7-9 (eval loops) depend on eval sets and run in batches of 2
159
+ 3. Task 10 (cross-check) depends on all loops completing
160
+ 4. Task 11 (summary) depends on everything
@@ -0,0 +1,11 @@
1
+ # Decisions Log: Agent Command Parity — Cleanup & Completion
2
+
3
+ - **Feature**: agent-command-parity-v2
4
+ - **Date**: 2026-03-15
5
+ - **Beads**: forge-2w3
6
+
7
+ ---
8
+
9
+ No decision gates fired during /dev. All tasks executed cleanly per design doc.
10
+
11
+ **Decision gate count: 0** (plan quality: Excellent)
@@ -0,0 +1,145 @@
1
+ # Design: Agent Command Parity — Cleanup & Completion
2
+
3
+ - **Slug**: agent-command-parity-v2
4
+ - **Date**: 2026-03-15
5
+ - **Status**: Complete
6
+ - **Beads**: forge-2w3
7
+ - **Supersedes**: docs/plans/2026-03-04-agent-command-parity-design.md (original, partially completed across PRs #52-#58)
8
+
9
+ ---
10
+
11
+ ## Purpose
12
+
13
+ Close out forge-2w3 by cleaning up all dropped-agent debris and completing the two remaining deliverables (plugin catalog fix + `forge check-agents` CLI). PRs #54-#58 delivered the command sync infrastructure and generated all 77 command files, but left behind stale references to 4 dropped agents (Antigravity, Windsurf, Aider, Continue) and never updated the plugin catalog or built the CLI validator.
14
+
15
+ ---
16
+
17
+ ## Success Criteria
18
+
19
+ 1. Zero references to dropped agents (Antigravity, Windsurf, Aider, Continue) in active code, config, or docs
20
+ 2. Plugin catalog (`lib/agents/*.plugin.json`) has correct capability flags for all 8 supported agents
21
+ 3. `forge check-agents` CLI command validates all agent configs are complete and consistent
22
+ 4. `node scripts/sync-commands.js --check` still passes
23
+ 5. All existing tests pass; new tests cover plugin catalog and check-agents
24
+ 6. Design doc status updated, forge-2w3 closeable
25
+
26
+ ---
27
+
28
+ ## Out of Scope
29
+
30
+ - Adding new agents
31
+ - Changing the sync script or adapter transforms (working correctly)
32
+ - Hooks support for any agent
33
+ - Rewriting docs/EXAMPLES.md examples 1-3,5 (just fix `/research` → `/plan` references)
34
+
35
+ ---
36
+
37
+ ## Approach Selected
38
+
39
+ **Mechanical cleanup + two small features.** No architecture changes. Three work streams:
40
+
41
+ 1. **Dropped-agent cleanup** — delete files, remove code paths, clean references
42
+ 2. **Plugin catalog fix** — update capability flags and directories for 6 plugins
43
+ 3. **`forge check-agents` CLI** — new command that reads plugin catalog + checks files exist
44
+
45
+ ---
46
+
47
+ ## Constraints
48
+
49
+ - Must not break `forge setup` for any of the 8 supported agents
50
+ - Research docs that are fundamentally about dropped agents → delete entire file
51
+ - Research docs with minor dropped-agent mentions → fix the references, keep the doc
52
+ - Examples fundamentally built on OpenSpec → delete the example
53
+ - Examples with minor `/research` stage references → fix to `/plan`
54
+ - Gitignore entries for dropped agents → remove (cleaner repo)
55
+
56
+ ---
57
+
58
+ ## Edge Cases
59
+
60
+ 1. **`.agents/skills/` has skill files** — orphaned, gitignored, no plugin references it. Safe to delete from disk (not tracked in git).
61
+ 2. **`.agent/` directory empty but exists** — gitignored, safe to delete from disk.
62
+ 3. **`.aider.conf.yml` is git-tracked** — must `git rm`, not just delete.
63
+ 4. **`lib/agents/continue.plugin.json` is git-tracked** — must `git rm`.
64
+ 5. **`bin/forge.js` has Continue setup function** — ~40 lines of dead code (generateContinueConfig, continueFormat logic). Remove entirely.
65
+ 6. **`packages/skills/src/lib/agents.js`** — lists all 4 dropped agents as enabled. Remove entries + update tests.
66
+ 7. **`openspec/` directory** — removed from forge.js in PR #54 but directory still exists. Check if git-tracked.
67
+ 8. **`package.json` description** — says "9-stage" and lists all dropped agents. Fix to "7-stage" with only 8 supported agents.
68
+
69
+ ---
70
+
71
+ ## Ambiguity Policy
72
+
73
+ Make a conservative choice and document it in the decisions log. Only pause for user input if the change could break `forge setup` or delete something that might be intentionally kept.
74
+
75
+ ---
76
+
77
+ ## Technical Research
78
+
79
+ ### Blast-Radius Search Results (Dropped Agents)
80
+
81
+ Complete inventory of every file referencing dropped agents:
82
+
83
+ #### Files to DELETE entirely:
84
+ | File | Reason |
85
+ |------|--------|
86
+ | `.aider.conf.yml` | Aider config, git-tracked |
87
+ | `lib/agents/continue.plugin.json` | Continue plugin, git-tracked |
88
+ | `docs/research/agent-instructions-sync.md` | Entirely about syncing GEMINI.md/.windsurfrules — obsolete approach |
89
+ | `docs/README-v1.3.md` | Frozen v1.3 snapshot, Antigravity/Windsurf/Continue throughout, misleading |
90
+
91
+ #### Files to EDIT (remove dropped references):
92
+ | File | What to fix |
93
+ |------|-------------|
94
+ | `package.json` | description: "7-stage", remove windsurf/aider/continue/antigravity from keywords |
95
+ | `packages/skills/src/lib/agents.js` | Remove aider, antigravity, continue, windsurf entries |
96
+ | `packages/skills/src/commands/sync.js` | Remove Aider config update logic, fix help text |
97
+ | `packages/skills/test/agents.test.js` | Remove Aider/Continue detection tests |
98
+ | `packages/skills/test/sync.test.js` | Remove Aider sync test |
99
+ | `bin/forge.js` | Remove Continue setup (~lines 1679, 1921, 1998-2020, 2067), continueFormat logic |
100
+ | `bin/forge-cmd.js` | Remove "OpenSpec" from plan description |
101
+ | `lib/project-discovery.js` | Remove Aider detection |
102
+ | `lib/agents/README.md` | Remove Windsurf, Antigravity, Aider rows |
103
+ | `docs/TOOLCHAIN.md` | Remove Windsurf mention, Continue MCP setup |
104
+ | `docs/EXAMPLES.md` | Delete Example 4 (OpenSpec-based), fix `/research` → `/plan` in examples 1,2,3,5 |
105
+ | `docs/AGENT_INSTALL_PROMPT.md` | Remove Continue detection |
106
+ | `docs/research/agent-permissions.md` | Remove Antigravity/Aider rows from tables |
107
+ | `docs/research/dependency-chain.md` | Fix 1 Continue reference |
108
+ | `docs/research/test-environment.md` | Fix 1 Continue reference |
109
+ | `CLAUDE.md` | Remove Continue MCP reference |
110
+ | `QUICKSTART.md` | Remove Windsurf from examples |
111
+ | `.forge/pr-body.md` | Remove Aider, Antigravity references |
112
+ | `.gitignore` | Remove `.agents/`, `.agent/`, `.aider/skills/`, `.continue/skills/`, `.windsurf/skills/` entries |
113
+ | `test-env/validation/agent-validator.test.js` | Remove aider from list |
114
+
115
+ #### Untracked dirs to delete from disk:
116
+ | Directory | Reason |
117
+ |-----------|--------|
118
+ | `.agent/` | Antigravity, empty, gitignored |
119
+ | `.agents/` | Antigravity shared skills, orphaned, gitignored |
120
+
121
+ #### Plugin catalog fixes (kept agents, wrong flags):
122
+ | Plugin | Changes |
123
+ |--------|---------|
124
+ | `cursor.plugin.json` | `commands: true`, add `"commands": ".cursor/commands"` to directories |
125
+ | `cline.plugin.json` | `commands: true`, add `"workflows": ".clinerules/workflows"` to directories |
126
+ | `copilot.plugin.json` | `commands: true` |
127
+ | `kilocode.plugin.json` | `commands: true` |
128
+ | `codex.plugin.json` | `commands: true`, add `"skills": ".codex/skills"` to directories |
129
+ | `claude.plugin.json` | `hooks: true` (has `.claude/settings.json` hooks) |
130
+
131
+ ### OWASP Top 10 Analysis
132
+
133
+ This feature deletes files and updates config/metadata. No user input processing, no auth, no network calls.
134
+
135
+ - **A01-A10**: Not applicable — purely static file cleanup and metadata correction.
136
+
137
+ ### TDD Test Scenarios
138
+
139
+ 1. **Plugin catalog**: Each supported agent's plugin.json has `commands: true` and correct directory path
140
+ 2. **No dropped agents in plugin catalog**: `continue.plugin.json` should not exist; no plugin has id matching dropped agents
141
+ 3. **`forge check-agents` happy path**: All 8 agent dirs populated → exits 0
142
+ 4. **`forge check-agents` missing file**: Remove one command file → exits non-zero, reports which file/agent
143
+ 5. **`forge check-agents` uses sync --check**: Delegates to existing sync infrastructure rather than reimplementing
144
+ 6. **Dropped-agent code removal**: `packages/skills/src/lib/agents.js` should not contain aider/antigravity/continue/windsurf
145
+ 7. **package.json accuracy**: Description says "7-stage", keywords don't include dropped agents
@@ -0,0 +1,211 @@
1
+ # Task List: Agent Command Parity — Cleanup & Completion
2
+
3
+ - **Feature**: agent-command-parity-v2
4
+ - **Date**: 2026-03-15
5
+ - **Beads**: forge-2w3
6
+ - **Branch**: feat/agent-command-parity
7
+ - **Worktree**: .worktrees/agent-command-parity
8
+ - **Design doc**: docs/plans/2026-03-15-agent-command-parity-v2-design.md
9
+
10
+ ---
11
+
12
+ ## Task 1: Delete dropped-agent files
13
+
14
+ **File(s)**:
15
+ - `.aider.conf.yml` (git rm)
16
+ - `lib/agents/continue.plugin.json` (git rm)
17
+ - `docs/research/agent-instructions-sync.md` (git rm)
18
+ - `docs/README-v1.3.md` (git rm)
19
+ - `.agent/` (rm -rf, untracked)
20
+ - `.agents/` (rm -rf, untracked)
21
+
22
+ **What to implement**: Remove all files that are entirely about dropped agents. Git-tracked files use `git rm`. Untracked/gitignored directories use `rm -rf`.
23
+
24
+ **TDD steps**:
25
+ 1. Write test: `test/cleanup/dropped-agent-files.test.js` — assert none of these files/dirs exist
26
+ 2. Run test: fails (files still exist)
27
+ 3. Implement: `git rm` tracked files, `rm -rf` untracked dirs
28
+ 4. Run test: passes
29
+ 5. Commit: `fix: remove dropped-agent files — aider config, continue plugin, stale research docs`
30
+
31
+ **Expected output**: All 6 paths gone.
32
+
33
+ ---
34
+
35
+ ## Task 2: Remove dropped-agent code from packages/skills
36
+
37
+ **File(s)**:
38
+ - `packages/skills/src/lib/agents.js` — remove aider, antigravity, continue, windsurf entries
39
+ - `packages/skills/src/commands/sync.js` — remove Aider updateAiderConfig(), fix help text
40
+ - `packages/skills/test/agents.test.js` — remove Aider/Continue detection tests
41
+ - `packages/skills/test/sync.test.js` — remove Aider sync test
42
+
43
+ **What to implement**: Remove all code paths, agent entries, and tests for the 4 dropped agents from the skills package.
44
+
45
+ **TDD steps**:
46
+ 1. Write test: `test/cleanup/dropped-agent-code.test.js` — grep these files for dropped agent names, assert zero matches
47
+ 2. Run test: fails (references still exist)
48
+ 3. Implement: edit all 4 files
49
+ 4. Run test: passes
50
+ 5. Run existing test suite: `bun test` — verify no regressions
51
+ 6. Commit: `fix: remove dropped-agent code from skills package — aider, antigravity, continue, windsurf`
52
+
53
+ **Expected output**: No dropped-agent names in skills package source or tests.
54
+
55
+ ---
56
+
57
+ ## Task 3: Remove dropped-agent code from bin/forge.js and lib/
58
+
59
+ **File(s)**:
60
+ - `bin/forge.js` — remove Continue setup function (~40 lines), continueFormat references
61
+ - `bin/forge-cmd.js` — remove "OpenSpec" from plan description
62
+ - `lib/project-discovery.js` — remove Aider detection logic
63
+
64
+ **What to implement**: Remove all dead code paths for dropped agents from the CLI and lib modules.
65
+
66
+ **TDD steps**:
67
+ 1. Write test: `test/cleanup/dropped-agent-cli.test.js` — grep these files for dropped agent names, assert zero matches (excluding comments about removal)
68
+ 2. Run test: fails
69
+ 3. Implement: edit all 3 files
70
+ 4. Run test: passes
71
+ 5. Run `bun test` — verify no regressions
72
+ 6. Commit: `fix: remove dropped-agent code from CLI — continue setup, aider detection, openspec ref`
73
+
74
+ **Expected output**: No dropped-agent function calls in CLI code.
75
+
76
+ ---
77
+
78
+ ## Task 4: Clean dropped-agent references from docs
79
+
80
+ **File(s)**:
81
+ - `docs/EXAMPLES.md` — delete Example 4 (OpenSpec-based), fix `/research` → `/plan` in examples 1,2,3,5
82
+ - `docs/TOOLCHAIN.md` — remove Windsurf mention, Continue MCP setup
83
+ - `docs/AGENT_INSTALL_PROMPT.md` — remove Continue detection
84
+ - `docs/research/agent-permissions.md` — remove Antigravity/Aider rows from tables
85
+ - `docs/research/dependency-chain.md` — fix 1 Continue reference
86
+ - `docs/research/test-environment.md` — fix 1 Continue reference
87
+ - `lib/agents/README.md` — remove Windsurf, Antigravity, Aider rows
88
+
89
+ **What to implement**: Fix each doc per the design doc rules — delete sections that are fundamentally about dropped agents, fix minor references in docs that are otherwise valid.
90
+
91
+ **TDD steps**:
92
+ 1. Write test: `test/cleanup/dropped-agent-docs.test.js` — grep all doc files for dropped agent names, assert zero matches (allow historical mentions in design docs and CHANGELOG)
93
+ 2. Run test: fails
94
+ 3. Implement: edit all files
95
+ 4. Run test: passes
96
+ 5. Commit: `docs: remove dropped-agent references from docs — antigravity, windsurf, aider, continue`
97
+
98
+ **Expected output**: No misleading dropped-agent references in active docs.
99
+
100
+ ---
101
+
102
+ ## Task 5: Clean package.json, CLAUDE.md, QUICKSTART.md, .gitignore, .forge/
103
+
104
+ **File(s)**:
105
+ - `package.json` — description: "7-stage" + 8 agents only, remove dropped keywords
106
+ - `CLAUDE.md` — remove Continue MCP reference
107
+ - `QUICKSTART.md` — remove Windsurf from examples
108
+ - `.gitignore` — remove `.agents/`, `.agent/`, `.aider/skills/`, `.continue/skills/`, `.windsurf/skills/`
109
+ - `.forge/pr-body.md` — remove Aider, Antigravity references
110
+ - `test-env/validation/agent-validator.test.js` — remove aider from list
111
+
112
+ **What to implement**: Fix all remaining config and metadata files.
113
+
114
+ **TDD steps**:
115
+ 1. Write test: `test/cleanup/dropped-agent-config.test.js` — assert package.json description says "7-stage", keywords don't include dropped agents, .gitignore doesn't have dropped entries
116
+ 2. Run test: fails
117
+ 3. Implement: edit all files
118
+ 4. Run test: passes
119
+ 5. Commit: `fix: clean dropped-agent refs from package.json, gitignore, quickstart, config`
120
+
121
+ **Expected output**: All metadata accurate. `bun test` passes.
122
+
123
+ ---
124
+
125
+ ## Task 6: Fix plugin catalog capability flags
126
+
127
+ **File(s)**:
128
+ - `lib/agents/cursor.plugin.json` — `commands: true`, add `"commands": ".cursor/commands"` to directories
129
+ - `lib/agents/cline.plugin.json` — `commands: true`, add `"workflows": ".clinerules/workflows"` to directories
130
+ - `lib/agents/copilot.plugin.json` — `commands: true`
131
+ - `lib/agents/kilocode.plugin.json` — `commands: true`
132
+ - `lib/agents/codex.plugin.json` — `commands: true`, add `"skills": ".codex/skills"` to directories
133
+ - `lib/agents/claude.plugin.json` — `hooks: true`
134
+
135
+ **What to implement**: Update each plugin.json to reflect actual capabilities. These flags affect `forge setup` output.
136
+
137
+ **TDD steps**:
138
+ 1. Write test: `test/cleanup/plugin-catalog.test.js` — assert each supported agent has `commands: true`, correct directories, no `continue` plugin exists
139
+ 2. Run test: fails
140
+ 3. Implement: edit 6 plugin.json files
141
+ 4. Run test: passes
142
+ 5. Run `bun test` — verify no regressions (existing plugin tests may need updates)
143
+ 6. Commit: `fix: update plugin catalog — correct capability flags for all 8 supported agents`
144
+
145
+ **Expected output**: All 8 plugins have accurate capability flags.
146
+
147
+ ---
148
+
149
+ ## Task 7: Build `forge check-agents` CLI command
150
+
151
+ **File(s)**:
152
+ - `scripts/check-agents.js` (new) — delegates to `sync-commands.js --check` + validates plugin catalog
153
+ - `test/scripts/check-agents.test.js` (new)
154
+
155
+ **What to implement**: CLI command that:
156
+ 1. Runs `syncCommands({ check: true })` to verify all agent command files are in sync
157
+ 2. Reads `lib/agents/*.plugin.json` to verify each agent with `commands: true` has its command directory populated
158
+ 3. Reports: missing files, out-of-sync files, stale files
159
+ 4. Exits 0 if all clean, non-zero if issues found
160
+
161
+ Keep it simple — reuse the existing sync infrastructure rather than reimplementing file checks.
162
+
163
+ **TDD steps**:
164
+ 1. Write test: `test/scripts/check-agents.test.js` — happy path (all files present → exit 0), missing file (→ exit non-zero), stale file detection
165
+ 2. Run test: fails (file doesn't exist)
166
+ 3. Implement: `scripts/check-agents.js`
167
+ 4. Run test: passes
168
+ 5. Run manually: `node scripts/check-agents.js` → "All agent command files are in sync."
169
+ 6. Commit: `feat: add forge check-agents CLI — validates agent configs are complete and in sync`
170
+
171
+ **Expected output**: `node scripts/check-agents.js` exits 0 on current repo.
172
+
173
+ ---
174
+
175
+ ## Task 8: Integration validation + design doc update
176
+
177
+ **File(s)**: No new files.
178
+
179
+ **What to implement**:
180
+ 1. Run full test suite: `bun test` — all pass
181
+ 2. Run `node scripts/sync-commands.js --check` — in sync
182
+ 3. Run `node scripts/check-agents.js` — all clean
183
+ 4. Grep entire codebase for dropped agent names — zero hits in active code/docs
184
+ 5. Update design doc status: "Active" → "Complete"
185
+ 6. Update original design doc (2026-03-04) status: "Draft" → "Superseded by 2026-03-15-agent-command-parity-v2-design.md"
186
+
187
+ **TDD steps**:
188
+ 1. Run all checks listed above
189
+ 2. Verify each passes
190
+ 3. Commit: `docs: mark agent-command-parity design docs as complete`
191
+
192
+ **Expected output**: All green. forge-2w3 ready to close after merge.
193
+
194
+ ---
195
+
196
+ ## Ordering Summary
197
+
198
+ | # | Task | Blocks | Parallelizable |
199
+ |---|------|--------|----------------|
200
+ | 1 | Delete dropped-agent files | — | Yes (with 2-5) |
201
+ | 2 | Remove dropped code from skills package | — | Yes (with 1,3-5) |
202
+ | 3 | Remove dropped code from CLI/lib | — | Yes (with 1,2,4,5) |
203
+ | 4 | Clean docs | — | Yes (with 1-3,5) |
204
+ | 5 | Clean config/metadata | — | Yes (with 1-4) |
205
+ | 6 | Fix plugin catalog | — | Yes (with 1-5) |
206
+ | 7 | Build check-agents CLI | 6 (reads plugin catalog) | After task 6 |
207
+ | 8 | Integration validation | All above | Last |
208
+
209
+ Tasks 1-6 are independent and can be parallelized.
210
+ Task 7 depends on task 6 (needs correct plugin catalog).
211
+ Task 8 is the final integration check.