workspace-maxxing 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/.agents/skills/workspace-maxxing/.workspace-templates/CONTEXT.md +44 -0
  2. package/.agents/skills/workspace-maxxing/.workspace-templates/SYSTEM.md +44 -0
  3. package/.agents/skills/workspace-maxxing/.workspace-templates/references/anti-patterns.md +16 -0
  4. package/.agents/skills/workspace-maxxing/.workspace-templates/references/iron-laws.md +26 -0
  5. package/.agents/skills/workspace-maxxing/.workspace-templates/references/reporting-format.md +52 -0
  6. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/benchmark.ts +171 -0
  7. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/dispatch.ts +473 -0
  8. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/generate-tests.ts +158 -0
  9. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/install-tool.ts +82 -0
  10. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/iterate.ts +265 -0
  11. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/orchestrator.ts +539 -0
  12. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/scaffold.ts +282 -0
  13. package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/validate.ts +452 -0
  14. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/architecture/SKILL.md +95 -0
  15. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/fixer/SKILL.md +109 -0
  16. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/iteration/SKILL.md +89 -0
  17. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
  18. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/research/SKILL.md +94 -0
  19. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/testing/SKILL.md +89 -0
  20. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/tooling/SKILL.md +87 -0
  21. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/validation/SKILL.md +103 -0
  22. package/.agents/skills/workspace-maxxing/.workspace-templates/skills/worker/SKILL.md +79 -0
  23. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
  24. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
  25. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
  26. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
  27. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
  28. package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/README.md +14 -0
  29. package/.agents/skills/workspace-maxxing/SKILL.md +312 -0
  30. package/.agents/skills/workspace-maxxing/scripts/benchmark.ts +171 -0
  31. package/.agents/skills/workspace-maxxing/scripts/dispatch.ts +473 -0
  32. package/.agents/skills/workspace-maxxing/scripts/generate-tests.ts +158 -0
  33. package/.agents/skills/workspace-maxxing/scripts/install-tool.ts +82 -0
  34. package/.agents/skills/workspace-maxxing/scripts/iterate.ts +265 -0
  35. package/.agents/skills/workspace-maxxing/scripts/orchestrator.ts +539 -0
  36. package/.agents/skills/workspace-maxxing/scripts/scaffold.ts +282 -0
  37. package/.agents/skills/workspace-maxxing/scripts/validate.ts +452 -0
  38. package/README.md +144 -0
  39. package/dist/agent-creator.d.ts +9 -0
  40. package/dist/agent-creator.d.ts.map +1 -0
  41. package/dist/agent-creator.js +199 -0
  42. package/dist/agent-creator.js.map +1 -0
  43. package/dist/agent-iterator.d.ts +38 -0
  44. package/dist/agent-iterator.d.ts.map +1 -0
  45. package/dist/agent-iterator.js +327 -0
  46. package/dist/agent-iterator.js.map +1 -0
  47. package/dist/index.d.ts +3 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +197 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/install.d.ts +18 -0
  52. package/dist/install.d.ts.map +1 -0
  53. package/dist/install.js +117 -0
  54. package/dist/install.js.map +1 -0
  55. package/dist/platforms/claude.d.ts +7 -0
  56. package/dist/platforms/claude.d.ts.map +1 -0
  57. package/dist/platforms/claude.js +70 -0
  58. package/dist/platforms/claude.js.map +1 -0
  59. package/dist/platforms/copilot.d.ts +7 -0
  60. package/dist/platforms/copilot.d.ts.map +1 -0
  61. package/dist/platforms/copilot.js +75 -0
  62. package/dist/platforms/copilot.js.map +1 -0
  63. package/dist/platforms/gemini.d.ts +7 -0
  64. package/dist/platforms/gemini.d.ts.map +1 -0
  65. package/dist/platforms/gemini.js +81 -0
  66. package/dist/platforms/gemini.js.map +1 -0
  67. package/dist/platforms/index.d.ts +8 -0
  68. package/dist/platforms/index.d.ts.map +1 -0
  69. package/dist/platforms/index.js +41 -0
  70. package/dist/platforms/index.js.map +1 -0
  71. package/dist/platforms/opencode.d.ts +7 -0
  72. package/dist/platforms/opencode.d.ts.map +1 -0
  73. package/dist/platforms/opencode.js +70 -0
  74. package/dist/platforms/opencode.js.map +1 -0
  75. package/dist/scripts/benchmark.d.ts +20 -0
  76. package/dist/scripts/benchmark.d.ts.map +1 -0
  77. package/dist/scripts/benchmark.js +170 -0
  78. package/dist/scripts/benchmark.js.map +1 -0
  79. package/dist/scripts/dispatch.d.ts +32 -0
  80. package/dist/scripts/dispatch.d.ts.map +1 -0
  81. package/dist/scripts/dispatch.js +386 -0
  82. package/dist/scripts/dispatch.js.map +1 -0
  83. package/dist/scripts/generate-tests.d.ts +11 -0
  84. package/dist/scripts/generate-tests.d.ts.map +1 -0
  85. package/dist/scripts/generate-tests.js +118 -0
  86. package/dist/scripts/generate-tests.js.map +1 -0
  87. package/dist/scripts/install-tool.d.ts +8 -0
  88. package/dist/scripts/install-tool.d.ts.map +1 -0
  89. package/dist/scripts/install-tool.js +98 -0
  90. package/dist/scripts/install-tool.js.map +1 -0
  91. package/dist/scripts/iterate.d.ts +44 -0
  92. package/dist/scripts/iterate.d.ts.map +1 -0
  93. package/dist/scripts/iterate.js +260 -0
  94. package/dist/scripts/iterate.js.map +1 -0
  95. package/dist/scripts/orchestrator.d.ts +40 -0
  96. package/dist/scripts/orchestrator.d.ts.map +1 -0
  97. package/dist/scripts/orchestrator.js +378 -0
  98. package/dist/scripts/orchestrator.js.map +1 -0
  99. package/dist/scripts/scaffold.d.ts +8 -0
  100. package/dist/scripts/scaffold.d.ts.map +1 -0
  101. package/dist/scripts/scaffold.js +279 -0
  102. package/dist/scripts/scaffold.js.map +1 -0
  103. package/dist/scripts/validate.d.ts +11 -0
  104. package/dist/scripts/validate.d.ts.map +1 -0
  105. package/dist/scripts/validate.js +472 -0
  106. package/dist/scripts/validate.js.map +1 -0
  107. package/docs/superpowers/plans/2026-04-07-autonomous-iteration-plan.md +1123 -0
  108. package/docs/superpowers/plans/2026-04-07-autonomous-iteration-sub-agent-batches.md +1923 -0
  109. package/docs/superpowers/plans/2026-04-07-autonomous-workflow-sub-skill-plan.md +1505 -0
  110. package/docs/superpowers/plans/2026-04-07-benchmarking-multi-agent-plan.md +854 -0
  111. package/docs/superpowers/plans/2026-04-07-workspace-builder-logic-plan.md +1426 -0
  112. package/docs/superpowers/plans/2026-04-07-workspace-maxxing-plan.md +1299 -0
  113. package/docs/superpowers/plans/2026-04-08-session-294c-subagent-invocation-plan.md +320 -0
  114. package/docs/superpowers/plans/2026-04-08-workflow-prompt-hardening-plan.md +1025 -0
  115. package/docs/superpowers/plans/2026-04-12-workspace-agent-creation-plan.md +992 -0
  116. package/docs/superpowers/specs/2026-04-07-autonomous-iteration-design.md +214 -0
  117. package/docs/superpowers/specs/2026-04-07-autonomous-iteration-sub-agent-batches-design.md +188 -0
  118. package/docs/superpowers/specs/2026-04-07-autonomous-workflow-sub-skill-design.md +137 -0
  119. package/docs/superpowers/specs/2026-04-07-benchmarking-multi-agent-design.md +105 -0
  120. package/docs/superpowers/specs/2026-04-07-workspace-builder-logic-design.md +179 -0
  121. package/docs/superpowers/specs/2026-04-07-workspace-maxxing-design.md +227 -0
  122. package/docs/superpowers/specs/2026-04-08-session-294c-subagent-invocation-design.md +265 -0
  123. package/docs/superpowers/specs/2026-04-08-workflow-prompt-hardening-design.md +146 -0
  124. package/docs/superpowers/specs/2026-04-12-workspace-agent-creation-design.md +239 -0
  125. package/jest.config.js +8 -0
  126. package/package.json +32 -0
  127. package/src/agent-creator.ts +180 -0
  128. package/src/agent-iterator.ts +397 -0
  129. package/src/index.ts +189 -0
  130. package/src/install.ts +105 -0
  131. package/src/platforms/claude.ts +40 -0
  132. package/src/platforms/copilot.ts +50 -0
  133. package/src/platforms/gemini.ts +55 -0
  134. package/src/platforms/index.ts +45 -0
  135. package/src/platforms/opencode.ts +41 -0
  136. package/src/scripts/benchmark.ts +171 -0
  137. package/src/scripts/dispatch.ts +473 -0
  138. package/src/scripts/generate-tests.ts +112 -0
  139. package/src/scripts/install-tool.ts +82 -0
  140. package/src/scripts/iterate.ts +271 -0
  141. package/src/scripts/orchestrator.ts +539 -0
  142. package/src/scripts/scaffold.ts +282 -0
  143. package/src/scripts/validate.ts +516 -0
  144. package/templates/.workspace-templates/CONTEXT.md +44 -0
  145. package/templates/.workspace-templates/SYSTEM.md +44 -0
  146. package/templates/.workspace-templates/references/anti-patterns.md +16 -0
  147. package/templates/.workspace-templates/references/iron-laws.md +26 -0
  148. package/templates/.workspace-templates/references/reporting-format.md +52 -0
  149. package/templates/.workspace-templates/scripts/benchmark.ts +171 -0
  150. package/templates/.workspace-templates/scripts/dispatch.ts +473 -0
  151. package/templates/.workspace-templates/scripts/generate-tests.ts +158 -0
  152. package/templates/.workspace-templates/scripts/install-tool.ts +82 -0
  153. package/templates/.workspace-templates/scripts/iterate.ts +265 -0
  154. package/templates/.workspace-templates/scripts/orchestrator.ts +539 -0
  155. package/templates/.workspace-templates/scripts/scaffold.ts +282 -0
  156. package/templates/.workspace-templates/scripts/validate.ts +452 -0
  157. package/templates/.workspace-templates/skills/architecture/SKILL.md +95 -0
  158. package/templates/.workspace-templates/skills/fixer/SKILL.md +109 -0
  159. package/templates/.workspace-templates/skills/iteration/SKILL.md +89 -0
  160. package/templates/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
  161. package/templates/.workspace-templates/skills/research/SKILL.md +94 -0
  162. package/templates/.workspace-templates/skills/testing/SKILL.md +89 -0
  163. package/templates/.workspace-templates/skills/tooling/SKILL.md +87 -0
  164. package/templates/.workspace-templates/skills/validation/SKILL.md +103 -0
  165. package/templates/.workspace-templates/skills/worker/SKILL.md +79 -0
  166. package/templates/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
  167. package/templates/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
  168. package/templates/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
  169. package/templates/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
  170. package/templates/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
  171. package/templates/.workspace-templates/workspace/README.md +14 -0
  172. package/templates/SKILL.md +347 -0
  173. package/tests/benchmark.test.ts +158 -0
  174. package/tests/cli.test.ts +109 -0
  175. package/tests/dispatch-parallel.test.ts +124 -0
  176. package/tests/dispatch.test.ts +218 -0
  177. package/tests/fixer-skill.test.ts +203 -0
  178. package/tests/generate-tests.test.ts +101 -0
  179. package/tests/install-tool.test.ts +141 -0
  180. package/tests/install.test.ts +144 -0
  181. package/tests/integration.test.ts +324 -0
  182. package/tests/iterate.test.ts +219 -0
  183. package/tests/orchestrator.test.ts +710 -0
  184. package/tests/scaffold.test.ts +238 -0
  185. package/tests/templates-enhanced.test.ts +208 -0
  186. package/tests/templates.test.ts +219 -0
  187. package/tests/validate.test.ts +421 -0
  188. package/tests/validation-enhanced.test.ts +303 -0
  189. package/tests/worker-skill.test.ts +88 -0
  190. package/tsconfig.json +19 -0
  191. package/workspace/00-meta/CONTEXT.md +3 -0
  192. package/workspace/00-meta/execution-log.md +17 -0
  193. package/workspace/00-meta/tools.md +11 -0
  194. package/workspace/01-input/CONTEXT.md +27 -0
  195. package/workspace/CONTEXT.md +35 -0
  196. package/workspace/README.md +14 -0
  197. package/workspace/SYSTEM.md +36 -0
  198. package/workspace-maxxing-0.1.0.tgz +0 -0
@@ -0,0 +1,854 @@
1
+ # Benchmarking & Multi-Agent Support Implementation Plan
2
+
3
+ > **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
4
+
5
+ **Goal:** Add weighted benchmark scoring, multi-agent CLI installation targeting, guided iteration reports, and console+JSON benchmark output.
6
+
7
+ **Architecture:** New `benchmark.ts` script for weighted scoring, enhanced `install.ts` with agent-targeting flags, extended `iterate.ts` with benchmark data in return values, and updated `index.ts` with new CLI flags.
8
+
9
+ **Tech Stack:** TypeScript, Node.js builtins only (fs, path, process), Jest for testing.
10
+
11
+ ---
12
+
13
+ ### Task 1: Create `src/scripts/benchmark.ts` — Weighted Scoring Engine
14
+
15
+ **Files:**
16
+ - Create: `src/scripts/benchmark.ts`
17
+ - Test: `tests/benchmark.test.ts`
18
+
19
+ - [ ] **Step 1: Write tests for weighted scoring**
20
+
21
+ ```typescript
22
+ // tests/benchmark.test.ts
23
+ import * as fs from 'fs';
24
+ import * as path from 'path';
25
+ import { calculateBenchmark, formatBenchmarkTable, BenchmarkResult } from './benchmark';
26
+
27
+ // Mock fs and path
28
+ jest.mock('fs');
29
+ jest.mock('path');
30
+
31
+ const mockFs = fs as jest.Mocked<typeof fs>;
32
+ const mockPath = path as jest.Mocked<typeof path>;
33
+
34
+ describe('calculateBenchmark', () => {
35
+ beforeEach(() => {
36
+ jest.clearAllMocks();
37
+ mockPath.resolve.mockImplementation((p: string) => p);
38
+ mockPath.join.mockImplementation((...args: string[]) => args.join('/'));
39
+ });
40
+
41
+ it('returns weighted scores for a workspace with all stages', () => {
42
+ mockFs.existsSync.mockReturnValue(true);
43
+ mockFs.readdirSync.mockReturnValue([
44
+ { name: '00-meta', isDirectory: () => true },
45
+ { name: '01-ideation', isDirectory: () => true },
46
+ { name: '02-research', isDirectory: () => true },
47
+ { name: '03-architecture', isDirectory: () => true },
48
+ ] as fs.Dirent[]);
49
+ mockFs.readFileSync.mockReturnValue('purpose: test\ninput: none\noutput: test\ndependencies: none');
50
+
51
+ const result = calculateBenchmark('/test-workspace');
52
+
53
+ expect(result.stages).toHaveLength(3);
54
+ expect(result.stages[0].name).toBe('01-ideation');
55
+ expect(result.stages[0].weight).toBe(1.5);
56
+ expect(result.stages[1].name).toBe('02-research');
57
+ expect(result.stages[1].weight).toBe(1.3);
58
+ expect(result.stages[2].name).toBe('03-architecture');
59
+ expect(result.stages[2].weight).toBe(1.2);
60
+ });
61
+
62
+ it('excludes missing stages from calculation', () => {
63
+ mockFs.existsSync.mockReturnValue(true);
64
+ mockFs.readdirSync.mockReturnValue([
65
+ { name: '00-meta', isDirectory: () => true },
66
+ { name: '01-ideation', isDirectory: () => true },
67
+ ] as fs.Dirent[]);
68
+ mockFs.readFileSync.mockReturnValue('purpose: test\ninput: none\noutput: test\ndependencies: none');
69
+
70
+ const result = calculateBenchmark('/test-workspace');
71
+
72
+ expect(result.stages).toHaveLength(1);
73
+ expect(result.stages[0].name).toBe('01-ideation');
74
+ });
75
+
76
+ it('normalizes final score to 0-100', () => {
77
+ mockFs.existsSync.mockReturnValue(true);
78
+ mockFs.readdirSync.mockReturnValue([
79
+ { name: '00-meta', isDirectory: () => true },
80
+ { name: '01-ideation', isDirectory: () => true },
81
+ ] as fs.Dirent[]);
82
+ mockFs.readFileSync.mockReturnValue('purpose: test\ninput: none\noutput: test\ndependencies: none');
83
+
84
+ const result = calculateBenchmark('/test-workspace');
85
+
86
+ expect(result.weightedScore).toBeGreaterThanOrEqual(0);
87
+ expect(result.weightedScore).toBeLessThanOrEqual(100);
88
+ });
89
+
90
+ it('returns empty stages for workspace with no numbered folders', () => {
91
+ mockFs.existsSync.mockReturnValue(false);
92
+ mockFs.readdirSync.mockReturnValue([
93
+ { name: '00-meta', isDirectory: () => true },
94
+ ] as fs.Dirent[]);
95
+
96
+ const result = calculateBenchmark('/test-workspace');
97
+
98
+ expect(result.stages).toHaveLength(0);
99
+ expect(result.weightedScore).toBe(0);
100
+ });
101
+ });
102
+
103
+ describe('formatBenchmarkTable', () => {
104
+ it('formats a benchmark result as a console table', () => {
105
+ const data: BenchmarkResult = {
106
+ workspace: 'test-ws',
107
+ agent: 'opencode',
108
+ timestamp: '2026-04-07T00:00:00Z',
109
+ rawScore: 72,
110
+ weightedScore: 78,
111
+ stages: [
112
+ { name: '01-ideation', raw: 85, weight: 1.5, weighted: 95 },
113
+ { name: '02-research', raw: 60, weight: 1.3, weighted: 58 },
114
+ ],
115
+ fixSuggestions: ['Add research sources'],
116
+ improvementPotential: true,
117
+ };
118
+
119
+ const table = formatBenchmarkTable(data);
120
+
121
+ expect(table).toContain('01-ideation');
122
+ expect(table).toContain('02-research');
123
+ expect(table).toContain('78');
124
+ expect(table).toContain('TOTAL');
125
+ });
126
+
127
+ it('handles empty stages gracefully', () => {
128
+ const data: BenchmarkResult = {
129
+ workspace: 'test-ws',
130
+ agent: 'opencode',
131
+ timestamp: '2026-04-07T00:00:00Z',
132
+ rawScore: 0,
133
+ weightedScore: 0,
134
+ stages: [],
135
+ fixSuggestions: [],
136
+ improvementPotential: false,
137
+ };
138
+
139
+ const table = formatBenchmarkTable(data);
140
+
141
+ expect(table).toContain('0');
142
+ });
143
+ });
144
+ ```
145
+
146
+ - [ ] **Step 2: Run tests to verify they fail**
147
+
148
+ Run: `npm test -- tests/benchmark.test.ts`
149
+ Expected: FAIL with "Cannot find module './benchmark'"
150
+
151
+ - [ ] **Step 3: Implement `src/scripts/benchmark.ts`**
152
+
153
+ ```typescript
154
+ import * as fs from 'fs';
155
+ import * as path from 'path';
156
+
157
+ export interface StageBenchmark {
158
+ name: string;
159
+ raw: number;
160
+ weight: number;
161
+ weighted: number;
162
+ }
163
+
164
+ export interface BenchmarkResult {
165
+ workspace: string;
166
+ agent: string;
167
+ timestamp: string;
168
+ rawScore: number;
169
+ weightedScore: number;
170
+ stages: StageBenchmark[];
171
+ fixSuggestions: string[];
172
+ improvementPotential: boolean;
173
+ }
174
+
175
+ const STAGE_WEIGHTS: Record<string, number> = {
176
+ '01-ideation': 1.5,
177
+ '02-research': 1.3,
178
+ '03-architecture': 1.2,
179
+ };
180
+
181
+ const DEFAULT_WEIGHT = 1.0;
182
+ const MAX_RAW_SCORE = 45; // Per-stage cap from validate.ts
183
+
184
+ export function calculateBenchmark(workspacePath: string): BenchmarkResult {
185
+ const ws = path.resolve(workspacePath);
186
+ const stageFolders = getNumberedFolders(ws);
187
+
188
+ const stages: StageBenchmark[] = [];
189
+ let totalWeighted = 0;
190
+ let totalWeight = 0;
191
+
192
+ for (const folder of stageFolders) {
193
+ const weight = STAGE_WEIGHTS[folder] ?? DEFAULT_WEIGHT;
194
+ const raw = calculateStageRawScore(ws, folder);
195
+ const weighted = (raw / MAX_RAW_SCORE) * 100 * weight;
196
+
197
+ stages.push({ name: folder, raw, weight, weighted });
198
+ totalWeighted += weighted;
199
+ totalWeight += weight;
200
+ }
201
+
202
+ const weightedScore = totalWeight > 0 ? totalWeighted / totalWeight : 0;
203
+ const rawScore = stages.reduce((sum, s) => sum + s.raw, 0);
204
+
205
+ const fixSuggestions = stages
206
+ .filter((s) => s.raw < MAX_RAW_SCORE)
207
+ .map((s) => `Improve ${s.name}: current score ${s.raw}/${MAX_RAW_SCORE}`);
208
+
209
+ return {
210
+ workspace: path.basename(ws),
211
+ agent: 'unknown',
212
+ timestamp: new Date().toISOString(),
213
+ rawScore,
214
+ weightedScore: Math.min(Math.round(weightedScore), 100),
215
+ stages,
216
+ fixSuggestions,
217
+ improvementPotential: stages.some((s) => s.raw < MAX_RAW_SCORE),
218
+ };
219
+ }
220
+
221
+ export function formatBenchmarkTable(data: BenchmarkResult): string {
222
+ const lines: string[] = [];
223
+
224
+ lines.push(`\nBenchmark Report: ${data.workspace}`);
225
+ lines.push(`Agent: ${data.agent} | Timestamp: ${data.timestamp}`);
226
+ lines.push('');
227
+ lines.push(
228
+ padRight('Stage', 20) +
229
+ padRight('Raw', 8) +
230
+ padRight('Weight', 10) +
231
+ padRight('Weighted', 12)
232
+ );
233
+ lines.push('-'.repeat(50));
234
+
235
+ for (const stage of data.stages) {
236
+ lines.push(
237
+ padRight(stage.name, 20) +
238
+ padRight(String(stage.raw), 8) +
239
+ padRight(stage.weight.toFixed(1) + 'x', 10) +
240
+ padRight(stage.weighted.toFixed(1), 12)
241
+ );
242
+ }
243
+
244
+ lines.push('-'.repeat(50));
245
+ lines.push(
246
+ padRight('TOTAL', 20) +
247
+ padRight(String(data.rawScore), 8) +
248
+ padRight('', 10) +
249
+ padRight(data.weightedScore.toFixed(1), 12)
250
+ );
251
+ lines.push('');
252
+
253
+ if (data.fixSuggestions.length > 0) {
254
+ lines.push('Suggestions:');
255
+ for (const suggestion of data.fixSuggestions) {
256
+ lines.push(` - ${suggestion}`);
257
+ }
258
+ lines.push('');
259
+ }
260
+
261
+ return lines.join('\n');
262
+ }
263
+
264
+ function calculateStageRawScore(ws: string, folder: string): number {
265
+ const stageContextPath = path.join(ws, folder, 'CONTEXT.md');
266
+ let score = 0;
267
+
268
+ if (fs.existsSync(stageContextPath)) {
269
+ const content = fs.readFileSync(stageContextPath, 'utf-8');
270
+ if (content.toLowerCase().includes('purpose') || content.toLowerCase().includes('## purpose')) score += 4;
271
+ if (content.toLowerCase().includes('input')) score += 4;
272
+ if (content.toLowerCase().includes('output')) score += 4;
273
+ if (content.toLowerCase().includes('dependenc')) score += 3;
274
+ // Additional checks for more granular scoring
275
+ if (content.toLowerCase().includes('## success criteria') || content.toLowerCase().includes('success criteria')) score += 5;
276
+ if (content.toLowerCase().includes('## approach') || content.toLowerCase().includes('approach')) score += 5;
277
+ if (content.toLowerCase().includes('## risks') || content.toLowerCase().includes('risks')) score += 5;
278
+ if (content.toLowerCase().includes('## timeline') || content.toLowerCase().includes('timeline')) score += 5;
279
+ if (content.toLowerCase().includes('## resources') || content.toLowerCase().includes('resources')) score += 5;
280
+ if (content.toLowerCase().includes('## validation') || content.toLowerCase().includes('validation')) score += 5;
281
+ }
282
+
283
+ return Math.min(score, MAX_RAW_SCORE);
284
+ }
285
+
286
+ function getNumberedFolders(workspacePath: string): string[] {
287
+ if (!fs.existsSync(workspacePath)) return [];
288
+ const entries = fs.readdirSync(workspacePath, { withFileTypes: true });
289
+ return entries
290
+ .filter((e) => e.isDirectory() && /^\d{2}-/.test(e.name) && e.name !== '00-meta')
291
+ .map((e) => e.name);
292
+ }
293
+
294
+ function padRight(str: string, length: number): string {
295
+ return str.padEnd(length);
296
+ }
297
+
298
+ if (require.main === module) {
299
+ const args = process.argv.slice(2);
300
+ const workspace = args.find((_, i) => i > 0 && args[i - 1] === '--workspace') || args[0];
301
+
302
+ if (!workspace) {
303
+ console.error('Usage: node benchmark.ts --workspace <path>');
304
+ process.exit(1);
305
+ }
306
+
307
+ const result = calculateBenchmark(workspace);
308
+ console.log(formatBenchmarkTable(result));
309
+ }
310
+ ```
311
+
312
+ - [ ] **Step 4: Run tests to verify they pass**
313
+
314
+ Run: `npm test -- tests/benchmark.test.ts`
315
+ Expected: All 6 tests pass
316
+
317
+ - [ ] **Step 5: Commit**
318
+
319
+ ```bash
320
+ git add src/scripts/benchmark.ts tests/benchmark.test.ts
321
+ git commit -m "feat: add weighted benchmark scoring engine"
322
+ ```
323
+
324
+ ---
325
+
326
+ ### Task 2: Add `saveBenchmarkReport` Function
327
+
328
+ **Files:**
329
+ - Modify: `src/scripts/benchmark.ts`
330
+ - Test: `tests/benchmark.test.ts`
331
+
332
+ - [ ] **Step 1: Write tests for saveBenchmarkReport**
333
+
334
+ ```typescript
335
+ // Add to tests/benchmark.test.ts
336
+ import { saveBenchmarkReport } from './benchmark';
337
+
338
+ describe('saveBenchmarkReport', () => {
339
+ it('saves benchmark report to .workspace-benchmarks directory', () => {
340
+ const data: BenchmarkResult = {
341
+ workspace: 'test-ws',
342
+ agent: 'opencode',
343
+ timestamp: '2026-04-07T00:00:00Z',
344
+ rawScore: 72,
345
+ weightedScore: 78,
346
+ stages: [],
347
+ fixSuggestions: [],
348
+ improvementPotential: false,
349
+ };
350
+
351
+ saveBenchmarkReport('/test-workspace', data);
352
+
353
+ expect(mockFs.mkdirSync).toHaveBeenCalledWith(
354
+ expect.stringContaining('.workspace-benchmarks'),
355
+ { recursive: true }
356
+ );
357
+ expect(mockFs.writeFileSync).toHaveBeenCalledWith(
358
+ expect.stringContaining('test-ws-'),
359
+ expect.stringContaining('"weightedScore":78'),
360
+ 'utf-8'
361
+ );
362
+ });
363
+ });
364
+ ```
365
+
366
+ - [ ] **Step 2: Run test to verify it fails**
367
+
368
+ Run: `npm test -- tests/benchmark.test.ts -t "saveBenchmarkReport"`
369
+ Expected: FAIL with "saveBenchmarkReport is not defined"
370
+
371
+ - [ ] **Step 3: Implement saveBenchmarkReport**
372
+
373
+ Add to `src/scripts/benchmark.ts`:
374
+
375
+ ```typescript
376
+ export function saveBenchmarkReport(workspacePath: string, data: BenchmarkResult): string {
377
+ const reportDir = path.join(workspacePath, '.workspace-benchmarks');
378
+ fs.mkdirSync(reportDir, { recursive: true });
379
+
380
+ const filename = `${data.workspace}-${data.timestamp.replace(/[:.]/g, '-')}.json`;
381
+ const filePath = path.join(reportDir, filename);
382
+
383
+ fs.writeFileSync(filePath, JSON.stringify(data, null, 2), 'utf-8');
384
+ return filePath;
385
+ }
386
+ ```
387
+
388
+ - [ ] **Step 4: Run tests to verify they pass**
389
+
390
+ Run: `npm test -- tests/benchmark.test.ts`
391
+ Expected: All 7 tests pass
392
+
393
+ - [ ] **Step 5: Commit**
394
+
395
+ ```bash
396
+ git add src/scripts/benchmark.ts tests/benchmark.test.ts
397
+ git commit -m "feat: add saveBenchmarkReport function"
398
+ ```
399
+
400
+ ---
401
+
402
+ ### Task 3: Enhance `src/install.ts` with Multi-Agent Targeting
403
+
404
+ **Files:**
405
+ - Modify: `src/install.ts`
406
+ - Test: `tests/install.test.ts`
407
+
408
+ - [ ] **Step 1: Write tests for agent-targeted installation**
409
+
410
+ ```typescript
411
+ // Add to tests/install.test.ts
412
+ import { getAgentTargetPath } from '../install';
413
+
414
+ describe('getAgentTargetPath', () => {
415
+ it('returns default path for no agent', () => {
416
+ const result = getAgentTargetPath('/project-root', undefined);
417
+ expect(result).toBe('/project-root/.agents/skills/workspace-maxxing');
418
+ });
419
+
420
+ it('returns opencode path for --opencode flag', () => {
421
+ const result = getAgentTargetPath('/project-root', 'opencode');
422
+ expect(result).toBe('/project-root/.agents/skills/workspace-maxxing');
423
+ });
424
+
425
+ it('returns claude path for --claude flag', () => {
426
+ const result = getAgentTargetPath('/project-root', 'claude');
427
+ expect(result).toBe('/project-root/.claude/skills');
428
+ });
429
+
430
+ it('returns copilot path for --copilot flag', () => {
431
+ const result = getAgentTargetPath('/project-root', 'copilot');
432
+ expect(result).toBe('/project-root/.github/copilot-instructions');
433
+ });
434
+
435
+ it('returns gemini path for --gemini flag', () => {
436
+ const result = getAgentTargetPath('/project-root', 'gemini');
437
+ expect(result).toBe('/project-root/.gemini/skills');
438
+ });
439
+ });
440
+ ```
441
+
442
+ - [ ] **Step 2: Run tests to verify they fail**
443
+
444
+ Run: `npm test -- tests/install.test.ts -t "getAgentTargetPath"`
445
+ Expected: FAIL with "getAgentTargetPath is not defined"
446
+
447
+ - [ ] **Step 3: Implement agent targeting in install.ts**
448
+
449
+ Add to `src/install.ts`:
450
+
451
+ ```typescript
452
+ export type AgentTarget = 'opencode' | 'claude' | 'copilot' | 'gemini' | undefined;
453
+
454
+ const AGENT_PATHS: Record<string, string> = {
455
+ opencode: '.agents/skills/workspace-maxxing',
456
+ claude: '.claude/skills',
457
+ copilot: '.github/copilot-instructions',
458
+ gemini: '.gemini/skills',
459
+ };
460
+
461
+ export function getAgentTargetPath(projectRoot: string, agent: AgentTarget): string {
462
+ const relativePath = AGENT_PATHS[agent ?? 'opencode'];
463
+ return path.join(projectRoot, relativePath);
464
+ }
465
+ ```
466
+
467
+ Modify `installSkill` to accept agent parameter:
468
+
469
+ ```typescript
470
+ export async function installSkill(
471
+ projectRoot: string,
472
+ templatesDir: string,
473
+ agent: AgentTarget = undefined,
474
+ ): Promise<InstallResult> {
475
+ const skillDir = getAgentTargetPath(projectRoot, agent);
476
+ // ... rest of existing implementation unchanged
477
+ }
478
+ ```
479
+
480
+ - [ ] **Step 4: Run tests to verify they pass**
481
+
482
+ Run: `npm test -- tests/install.test.ts`
483
+ Expected: All existing tests + 5 new tests pass
484
+
485
+ - [ ] **Step 5: Commit**
486
+
487
+ ```bash
488
+ git add src/install.ts tests/install.test.ts
489
+ git commit -m "feat: add multi-agent installation targeting"
490
+ ```
491
+
492
+ ---
493
+
494
+ ### Task 4: Update `src/index.ts` with New CLI Flags
495
+
496
+ **Files:**
497
+ - Modify: `src/index.ts`
498
+ - Test: `tests/cli.test.ts`
499
+
500
+ - [ ] **Step 1: Write tests for new CLI flags**
501
+
502
+ ```typescript
503
+ // Add to tests/cli.test.ts
504
+ describe('CLI flags', () => {
505
+ it('accepts --claude flag', () => {
506
+ const { stdout } = execSync('node dist/index.js --claude', {
507
+ env: { ...process.env, WORKSPACE_MAXXING_TEMPLATES: templatesDir },
508
+ });
509
+ expect(stdout).toContain('Skill installed to');
510
+ expect(stdout).toContain('.claude/skills');
511
+ });
512
+
513
+ it('accepts --copilot flag', () => {
514
+ const { stdout } = execSync('node dist/index.js --copilot', {
515
+ env: { ...process.env, WORKSPACE_MAXXING_TEMPLATES: templatesDir },
516
+ });
517
+ expect(stdout).toContain('.github/copilot-instructions');
518
+ });
519
+
520
+ it('accepts --gemini flag', () => {
521
+ const { stdout } = execSync('node dist/index.js --gemini', {
522
+ env: { ...process.env, WORKSPACE_MAXXING_TEMPLATES: templatesDir },
523
+ });
524
+ expect(stdout).toContain('.gemini/skills');
525
+ });
526
+ });
527
+ ```
528
+
529
+ - [ ] **Step 2: Run tests to verify they fail**
530
+
531
+ Run: `npm test -- tests/cli.test.ts -t "CLI flags"`
532
+ Expected: FAIL (flags not recognized yet)
533
+
534
+ - [ ] **Step 3: Update CLI flag handling**
535
+
536
+ Modify `src/index.ts`:
537
+
538
+ ```typescript
539
+ import { detectProjectRoot, installSkill, AgentTarget } from './install';
540
+
541
+ function showHelp(): void {
542
+ console.log(`
543
+ workspace-maxxing — npx-installable skill for AI agents
544
+
545
+ Usage:
546
+ npx workspace-maxxing [options]
547
+
548
+ Options:
549
+ --opencode Install skill for OpenCode agents (default)
550
+ --claude Install skill for Claude Code agents
551
+ --copilot Install skill for GitHub Copilot agents
552
+ --gemini Install skill for Gemini CLI agents
553
+ --help Show this help message
554
+
555
+ Examples:
556
+ npx workspace-maxxing --opencode
557
+ npx workspace-maxxing --claude
558
+ npx workspace-maxxing --copilot
559
+ npx workspace-maxxing --gemini
560
+ `);
561
+ }
562
+
563
+ async function main(): Promise<void> {
564
+ const args = process.argv.slice(2);
565
+
566
+ if (args.length === 0 || args.includes('--help')) {
567
+ showHelp();
568
+ process.exit(0);
569
+ }
570
+
571
+ const agentFlags: AgentTarget[] = ['opencode', 'claude', 'copilot', 'gemini'];
572
+ const detectedAgent = agentFlags.find((flag) => args.includes(`--${flag}`));
573
+
574
+ if (detectedAgent) {
575
+ const cwd = process.cwd();
576
+ const projectRoot = detectProjectRoot(cwd);
577
+
578
+ if (projectRoot !== cwd) {
579
+ console.log(`Detected project root: ${projectRoot}`);
580
+ }
581
+
582
+ const templatesDir =
583
+ process.env.WORKSPACE_MAXXING_TEMPLATES ??
584
+ path.join(__dirname, '..', 'templates');
585
+
586
+ console.log(`Installing workspace-maxxing skill for ${detectedAgent}...`);
587
+ const result = await installSkill(projectRoot, templatesDir, detectedAgent);
588
+
589
+ if (result.success) {
590
+ console.log(`Skill installed to: ${result.skillPath}`);
591
+ console.log(`Open a new ${detectedAgent} session and invoke the workspace-maxxing skill to get started.`);
592
+ } else {
593
+ console.error(`Installation failed: ${result.error}`);
594
+ process.exit(1);
595
+ }
596
+
597
+ return;
598
+ }
599
+
600
+ console.error(`Unknown flag: ${args.find((a) => a.startsWith('--'))}`);
601
+ console.error('Run "npx workspace-maxxing --help" for usage.');
602
+ process.exit(1);
603
+ }
604
+ ```
605
+
606
+ - [ ] **Step 4: Build and run tests**
607
+
608
+ Run: `npm run build && npm test -- tests/cli.test.ts`
609
+ Expected: All CLI tests pass
610
+
611
+ - [ ] **Step 5: Commit**
612
+
613
+ ```bash
614
+ git add src/index.ts tests/cli.test.ts
615
+ git commit -m "feat: add --claude, --copilot, --gemini CLI flags"
616
+ ```
617
+
618
+ ---
619
+
620
+ ### Task 5: Extend `src/scripts/iterate.ts` with Benchmark Data
621
+
622
+ **Files:**
623
+ - Modify: `src/scripts/iterate.ts`
624
+ - Test: `tests/iterate.test.ts`
625
+
626
+ - [ ] **Step 1: Write tests for benchmark integration**
627
+
628
+ ```typescript
629
+ // Add to tests/iterate.test.ts
630
+ import { iterateWorkspace } from '../iterate';
631
+ import * as benchmark from '../benchmark';
632
+
633
+ jest.mock('../benchmark');
634
+
635
+ describe('iterateWorkspace with benchmark', () => {
636
+ it('includes benchmark data in result', () => {
637
+ (benchmark.calculateBenchmark as jest.Mock).mockReturnValue({
638
+ workspace: 'test-ws',
639
+ agent: 'opencode',
640
+ timestamp: '2026-04-07T00:00:00Z',
641
+ rawScore: 72,
642
+ weightedScore: 78,
643
+ stages: [{ name: '01-ideation', raw: 85, weight: 1.5, weighted: 95 }],
644
+ fixSuggestions: ['Add research sources'],
645
+ improvementPotential: true,
646
+ });
647
+
648
+ const result = iterateWorkspace('/test-workspace');
649
+
650
+ expect(result.benchmark).toBeDefined();
651
+ expect(result.benchmark?.weightedScore).toBe(78);
652
+ expect(result.benchmark?.improvementPotential).toBe(true);
653
+ });
654
+
655
+ it('passes agent flag to benchmark result', () => {
656
+ (benchmark.calculateBenchmark as jest.Mock).mockReturnValue({
657
+ workspace: 'test-ws',
658
+ agent: 'claude',
659
+ timestamp: '2026-04-07T00:00:00Z',
660
+ rawScore: 72,
661
+ weightedScore: 78,
662
+ stages: [],
663
+ fixSuggestions: [],
664
+ improvementPotential: false,
665
+ });
666
+
667
+ const result = iterateWorkspace('/test-workspace', { agent: 'claude' });
668
+
669
+ expect(result.benchmark?.agent).toBe('claude');
670
+ });
671
+ });
672
+ ```
673
+
674
+ - [ ] **Step 2: Run tests to verify they fail**
675
+
676
+ Run: `npm test -- tests/iterate.test.ts -t "benchmark"`
677
+ Expected: FAIL (benchmark integration not implemented)
678
+
679
+ - [ ] **Step 3: Update IterateResult interface and iterateWorkspace function**
680
+
681
+ Modify `src/scripts/iterate.ts`:
682
+
683
+ ```typescript
684
+ import { calculateBenchmark, BenchmarkResult } from './benchmark';
685
+
686
+ export interface IterateOptions {
687
+ maxRetries?: number;
688
+ agent?: string;
689
+ }
690
+
691
+ export interface IterateResult {
692
+ passes: {
693
+ validate: ValidatePassResult;
694
+ score: ScorePassResult;
695
+ checklist: ChecklistResult;
696
+ };
697
+ benchmark?: BenchmarkResult;
698
+ escalate: boolean;
699
+ }
700
+ ```
701
+
702
+ Update `iterateWorkspace` function:
703
+
704
+ ```typescript
705
+ export function iterateWorkspace(
706
+ workspacePath: string,
707
+ options: IterateOptions = {},
708
+ ): IterateResult {
709
+ const { maxRetries = 3, agent = 'unknown' } = options;
710
+ const ws = path.resolve(workspacePath);
711
+
712
+ const validateResult = runValidatePass(ws, maxRetries);
713
+ const scoreResult = runScorePass(ws);
714
+ const checklistResult = runChecklist(ws);
715
+ const benchmarkResult = calculateBenchmark(ws);
716
+ benchmarkResult.agent = agent;
717
+
718
+ const result: IterateResult = {
719
+ passes: {
720
+ validate: validateResult,
721
+ score: scoreResult,
722
+ checklist: checklistResult,
723
+ },
724
+ benchmark: benchmarkResult,
725
+ escalate: validateResult.status === 'escalated',
726
+ };
727
+
728
+ console.log(JSON.stringify(result, null, 2));
729
+
730
+ return result;
731
+ }
732
+ ```
733
+
734
+ - [ ] **Step 4: Run tests to verify they pass**
735
+
736
+ Run: `npm test -- tests/iterate.test.ts`
737
+ Expected: All iterate tests pass
738
+
739
+ - [ ] **Step 5: Commit**
740
+
741
+ ```bash
742
+ git add src/scripts/iterate.ts tests/iterate.test.ts
743
+ git commit -m "feat: integrate benchmark data into iterate results"
744
+ ```
745
+
746
+ ---
747
+
748
+ ### Task 6: Copy benchmark.ts to Templates
749
+
750
+ **Files:**
751
+ - Modify: `templates/.workspace-templates/scripts/` (copy benchmark.ts here)
752
+ - Modify: `src/install.ts` (ensure benchmark.ts is copied during install)
753
+
754
+ - [ ] **Step 1: Copy benchmark.ts to templates**
755
+
756
+ ```bash
757
+ cp src/scripts/benchmark.ts templates/.workspace-templates/scripts/
758
+ ```
759
+
760
+ - [ ] **Step 2: Verify installer copies benchmark.ts**
761
+
762
+ The installer already copies everything from `templates/.workspace-templates/scripts/` to the skill directory. Verify by checking `src/install.ts` line 79-83 — it copies the entire scripts directory recursively, so benchmark.ts will be included automatically.
763
+
764
+ - [ ] **Step 3: Run full test suite**
765
+
766
+ Run: `npm test`
767
+ Expected: All tests pass (baseline 75 + new tests)
768
+
769
+ - [ ] **Step 4: Commit**
770
+
771
+ ```bash
772
+ git add templates/.workspace-templates/scripts/benchmark.ts
773
+ git commit -m "feat: include benchmark script in templates"
774
+ ```
775
+
776
+ ---
777
+
778
+ ### Task 7: Update Templates SKILL.md
779
+
780
+ **Files:**
781
+ - Modify: `templates/SKILL.md`
782
+
783
+ - [ ] **Step 1: Add benchmark section to SKILL.md**
784
+
785
+ Add to `templates/SKILL.md` after the "Autonomous Iteration" section:
786
+
787
+ ```markdown
788
+ ## Benchmarking
789
+
790
+ Run benchmarks to assess workspace quality with weighted scoring:
791
+
792
+ \`\`\`bash
793
+ node .agents/skills/workspace-maxxing/scripts/benchmark.ts --workspace <workspace-path>
794
+ \`\`\`
795
+
796
+ **Weights:**
797
+ - `01-ideation`: 1.5x (core thinking quality)
798
+ - `02-research`: 1.3x (evidence gathering)
799
+ - `03-architecture`: 1.2x (structural decisions)
800
+ - All other stages: 1.0x
801
+
802
+ **Output:**
803
+ - Console: Formatted table with stage scores and suggestions
804
+ - JSON: Saved to `.workspace-benchmarks/<workspace>-<timestamp>.json`
805
+
806
+ **Integration with Iteration:**
807
+ The `iterate.ts` script now includes benchmark data in its return value. Use the `improvementPotential` field to decide whether to continue iterating.
808
+ ```
809
+
810
+ - [ ] **Step 2: Update Available Scripts section**
811
+
812
+ Add benchmark.ts to the "Available Scripts" table in SKILL.md:
813
+
814
+ ```markdown
815
+ | `benchmark.ts` | Run weighted benchmark scoring on a workspace |
816
+ ```
817
+
818
+ - [ ] **Step 3: Run template tests**
819
+
820
+ Run: `npm test -- tests/templates.test.ts`
821
+ Expected: All template tests pass
822
+
823
+ - [ ] **Step 4: Commit**
824
+
825
+ ```bash
826
+ git add templates/SKILL.md
827
+ git commit -m "docs: add benchmarking section to SKILL.md"
828
+ ```
829
+
830
+ ---
831
+
832
+ ### Task 8: Full Test Suite & Final Commit
833
+
834
+ - [ ] **Step 1: Run full test suite**
835
+
836
+ Run: `npm test`
837
+ Expected: All tests pass (baseline 75 + all new tests from Tasks 1-7)
838
+
839
+ - [ ] **Step 2: Run build**
840
+
841
+ Run: `npm run build`
842
+ Expected: Build succeeds
843
+
844
+ - [ ] **Step 3: Run integration test**
845
+
846
+ Run: `npm test -- tests/integration.test.ts`
847
+ Expected: Integration tests pass
848
+
849
+ - [ ] **Step 4: Final commit**
850
+
851
+ ```bash
852
+ git add .
853
+ git commit -m "feat(sub-project-4): complete benchmarking & multi-agent support"
854
+ ```