workspace-maxxing 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/workspace-maxxing/.workspace-templates/CONTEXT.md +44 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/SYSTEM.md +44 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/anti-patterns.md +16 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/iron-laws.md +26 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/references/reporting-format.md +52 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/benchmark.ts +171 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/dispatch.ts +473 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/generate-tests.ts +158 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/install-tool.ts +82 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/iterate.ts +265 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/orchestrator.ts +539 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/scaffold.ts +282 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/scripts/validate.ts +452 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/architecture/SKILL.md +95 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/fixer/SKILL.md +109 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/iteration/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/research/SKILL.md +94 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/testing/SKILL.md +89 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/tooling/SKILL.md +87 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/validation/SKILL.md +103 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/skills/worker/SKILL.md +79 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
- package/.agents/skills/workspace-maxxing/.workspace-templates/workspace/README.md +14 -0
- package/.agents/skills/workspace-maxxing/SKILL.md +312 -0
- package/.agents/skills/workspace-maxxing/scripts/benchmark.ts +171 -0
- package/.agents/skills/workspace-maxxing/scripts/dispatch.ts +473 -0
- package/.agents/skills/workspace-maxxing/scripts/generate-tests.ts +158 -0
- package/.agents/skills/workspace-maxxing/scripts/install-tool.ts +82 -0
- package/.agents/skills/workspace-maxxing/scripts/iterate.ts +265 -0
- package/.agents/skills/workspace-maxxing/scripts/orchestrator.ts +539 -0
- package/.agents/skills/workspace-maxxing/scripts/scaffold.ts +282 -0
- package/.agents/skills/workspace-maxxing/scripts/validate.ts +452 -0
- package/README.md +144 -0
- package/dist/agent-creator.d.ts +9 -0
- package/dist/agent-creator.d.ts.map +1 -0
- package/dist/agent-creator.js +199 -0
- package/dist/agent-creator.js.map +1 -0
- package/dist/agent-iterator.d.ts +38 -0
- package/dist/agent-iterator.d.ts.map +1 -0
- package/dist/agent-iterator.js +327 -0
- package/dist/agent-iterator.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +197 -0
- package/dist/index.js.map +1 -0
- package/dist/install.d.ts +18 -0
- package/dist/install.d.ts.map +1 -0
- package/dist/install.js +117 -0
- package/dist/install.js.map +1 -0
- package/dist/platforms/claude.d.ts +7 -0
- package/dist/platforms/claude.d.ts.map +1 -0
- package/dist/platforms/claude.js +70 -0
- package/dist/platforms/claude.js.map +1 -0
- package/dist/platforms/copilot.d.ts +7 -0
- package/dist/platforms/copilot.d.ts.map +1 -0
- package/dist/platforms/copilot.js +75 -0
- package/dist/platforms/copilot.js.map +1 -0
- package/dist/platforms/gemini.d.ts +7 -0
- package/dist/platforms/gemini.d.ts.map +1 -0
- package/dist/platforms/gemini.js +81 -0
- package/dist/platforms/gemini.js.map +1 -0
- package/dist/platforms/index.d.ts +8 -0
- package/dist/platforms/index.d.ts.map +1 -0
- package/dist/platforms/index.js +41 -0
- package/dist/platforms/index.js.map +1 -0
- package/dist/platforms/opencode.d.ts +7 -0
- package/dist/platforms/opencode.d.ts.map +1 -0
- package/dist/platforms/opencode.js +70 -0
- package/dist/platforms/opencode.js.map +1 -0
- package/dist/scripts/benchmark.d.ts +20 -0
- package/dist/scripts/benchmark.d.ts.map +1 -0
- package/dist/scripts/benchmark.js +170 -0
- package/dist/scripts/benchmark.js.map +1 -0
- package/dist/scripts/dispatch.d.ts +32 -0
- package/dist/scripts/dispatch.d.ts.map +1 -0
- package/dist/scripts/dispatch.js +386 -0
- package/dist/scripts/dispatch.js.map +1 -0
- package/dist/scripts/generate-tests.d.ts +11 -0
- package/dist/scripts/generate-tests.d.ts.map +1 -0
- package/dist/scripts/generate-tests.js +118 -0
- package/dist/scripts/generate-tests.js.map +1 -0
- package/dist/scripts/install-tool.d.ts +8 -0
- package/dist/scripts/install-tool.d.ts.map +1 -0
- package/dist/scripts/install-tool.js +98 -0
- package/dist/scripts/install-tool.js.map +1 -0
- package/dist/scripts/iterate.d.ts +44 -0
- package/dist/scripts/iterate.d.ts.map +1 -0
- package/dist/scripts/iterate.js +260 -0
- package/dist/scripts/iterate.js.map +1 -0
- package/dist/scripts/orchestrator.d.ts +40 -0
- package/dist/scripts/orchestrator.d.ts.map +1 -0
- package/dist/scripts/orchestrator.js +378 -0
- package/dist/scripts/orchestrator.js.map +1 -0
- package/dist/scripts/scaffold.d.ts +8 -0
- package/dist/scripts/scaffold.d.ts.map +1 -0
- package/dist/scripts/scaffold.js +279 -0
- package/dist/scripts/scaffold.js.map +1 -0
- package/dist/scripts/validate.d.ts +11 -0
- package/dist/scripts/validate.d.ts.map +1 -0
- package/dist/scripts/validate.js +472 -0
- package/dist/scripts/validate.js.map +1 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-iteration-plan.md +1123 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-iteration-sub-agent-batches.md +1923 -0
- package/docs/superpowers/plans/2026-04-07-autonomous-workflow-sub-skill-plan.md +1505 -0
- package/docs/superpowers/plans/2026-04-07-benchmarking-multi-agent-plan.md +854 -0
- package/docs/superpowers/plans/2026-04-07-workspace-builder-logic-plan.md +1426 -0
- package/docs/superpowers/plans/2026-04-07-workspace-maxxing-plan.md +1299 -0
- package/docs/superpowers/plans/2026-04-08-session-294c-subagent-invocation-plan.md +320 -0
- package/docs/superpowers/plans/2026-04-08-workflow-prompt-hardening-plan.md +1025 -0
- package/docs/superpowers/plans/2026-04-12-workspace-agent-creation-plan.md +992 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-iteration-design.md +214 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-iteration-sub-agent-batches-design.md +188 -0
- package/docs/superpowers/specs/2026-04-07-autonomous-workflow-sub-skill-design.md +137 -0
- package/docs/superpowers/specs/2026-04-07-benchmarking-multi-agent-design.md +105 -0
- package/docs/superpowers/specs/2026-04-07-workspace-builder-logic-design.md +179 -0
- package/docs/superpowers/specs/2026-04-07-workspace-maxxing-design.md +227 -0
- package/docs/superpowers/specs/2026-04-08-session-294c-subagent-invocation-design.md +265 -0
- package/docs/superpowers/specs/2026-04-08-workflow-prompt-hardening-design.md +146 -0
- package/docs/superpowers/specs/2026-04-12-workspace-agent-creation-design.md +239 -0
- package/jest.config.js +8 -0
- package/package.json +32 -0
- package/src/agent-creator.ts +180 -0
- package/src/agent-iterator.ts +397 -0
- package/src/index.ts +189 -0
- package/src/install.ts +105 -0
- package/src/platforms/claude.ts +40 -0
- package/src/platforms/copilot.ts +50 -0
- package/src/platforms/gemini.ts +55 -0
- package/src/platforms/index.ts +45 -0
- package/src/platforms/opencode.ts +41 -0
- package/src/scripts/benchmark.ts +171 -0
- package/src/scripts/dispatch.ts +473 -0
- package/src/scripts/generate-tests.ts +112 -0
- package/src/scripts/install-tool.ts +82 -0
- package/src/scripts/iterate.ts +271 -0
- package/src/scripts/orchestrator.ts +539 -0
- package/src/scripts/scaffold.ts +282 -0
- package/src/scripts/validate.ts +516 -0
- package/templates/.workspace-templates/CONTEXT.md +44 -0
- package/templates/.workspace-templates/SYSTEM.md +44 -0
- package/templates/.workspace-templates/references/anti-patterns.md +16 -0
- package/templates/.workspace-templates/references/iron-laws.md +26 -0
- package/templates/.workspace-templates/references/reporting-format.md +52 -0
- package/templates/.workspace-templates/scripts/benchmark.ts +171 -0
- package/templates/.workspace-templates/scripts/dispatch.ts +473 -0
- package/templates/.workspace-templates/scripts/generate-tests.ts +158 -0
- package/templates/.workspace-templates/scripts/install-tool.ts +82 -0
- package/templates/.workspace-templates/scripts/iterate.ts +265 -0
- package/templates/.workspace-templates/scripts/orchestrator.ts +539 -0
- package/templates/.workspace-templates/scripts/scaffold.ts +282 -0
- package/templates/.workspace-templates/scripts/validate.ts +452 -0
- package/templates/.workspace-templates/skills/architecture/SKILL.md +95 -0
- package/templates/.workspace-templates/skills/fixer/SKILL.md +109 -0
- package/templates/.workspace-templates/skills/iteration/SKILL.md +89 -0
- package/templates/.workspace-templates/skills/prompt-engineering/SKILL.md +87 -0
- package/templates/.workspace-templates/skills/research/SKILL.md +94 -0
- package/templates/.workspace-templates/skills/testing/SKILL.md +89 -0
- package/templates/.workspace-templates/skills/tooling/SKILL.md +87 -0
- package/templates/.workspace-templates/skills/validation/SKILL.md +103 -0
- package/templates/.workspace-templates/skills/worker/SKILL.md +79 -0
- package/templates/.workspace-templates/workspace/00-meta/CONTEXT.md +6 -0
- package/templates/.workspace-templates/workspace/00-meta/execution-log.md +27 -0
- package/templates/.workspace-templates/workspace/01-input/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/02-process/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/03-output/CONTEXT.md +29 -0
- package/templates/.workspace-templates/workspace/README.md +14 -0
- package/templates/SKILL.md +347 -0
- package/tests/benchmark.test.ts +158 -0
- package/tests/cli.test.ts +109 -0
- package/tests/dispatch-parallel.test.ts +124 -0
- package/tests/dispatch.test.ts +218 -0
- package/tests/fixer-skill.test.ts +203 -0
- package/tests/generate-tests.test.ts +101 -0
- package/tests/install-tool.test.ts +141 -0
- package/tests/install.test.ts +144 -0
- package/tests/integration.test.ts +324 -0
- package/tests/iterate.test.ts +219 -0
- package/tests/orchestrator.test.ts +710 -0
- package/tests/scaffold.test.ts +238 -0
- package/tests/templates-enhanced.test.ts +208 -0
- package/tests/templates.test.ts +219 -0
- package/tests/validate.test.ts +421 -0
- package/tests/validation-enhanced.test.ts +303 -0
- package/tests/worker-skill.test.ts +88 -0
- package/tsconfig.json +19 -0
- package/workspace/00-meta/CONTEXT.md +3 -0
- package/workspace/00-meta/execution-log.md +17 -0
- package/workspace/00-meta/tools.md +11 -0
- package/workspace/01-input/CONTEXT.md +27 -0
- package/workspace/CONTEXT.md +35 -0
- package/workspace/README.md +14 -0
- package/workspace/SYSTEM.md +36 -0
- package/workspace-maxxing-0.1.0.tgz +0 -0
|
@@ -0,0 +1,854 @@
|
|
|
1
|
+
# Benchmarking & Multi-Agent Support Implementation Plan
|
|
2
|
+
|
|
3
|
+
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
|
4
|
+
|
|
5
|
+
**Goal:** Add weighted benchmark scoring, multi-agent CLI installation targeting, guided iteration reports, and console+JSON benchmark output.
|
|
6
|
+
|
|
7
|
+
**Architecture:** New `benchmark.ts` script for weighted scoring, enhanced `install.ts` with agent-targeting flags, extended `iterate.ts` with benchmark data in return values, and updated `index.ts` with new CLI flags.
|
|
8
|
+
|
|
9
|
+
**Tech Stack:** TypeScript, Node.js builtins only (fs, path, process), Jest for testing.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
### Task 1: Create `src/scripts/benchmark.ts` — Weighted Scoring Engine
|
|
14
|
+
|
|
15
|
+
**Files:**
|
|
16
|
+
- Create: `src/scripts/benchmark.ts`
|
|
17
|
+
- Test: `tests/benchmark.test.ts`
|
|
18
|
+
|
|
19
|
+
- [ ] **Step 1: Write tests for weighted scoring**
|
|
20
|
+
|
|
21
|
+
```typescript
|
|
22
|
+
// tests/benchmark.test.ts
|
|
23
|
+
import * as fs from 'fs';
|
|
24
|
+
import * as path from 'path';
|
|
25
|
+
import { calculateBenchmark, formatBenchmarkTable, BenchmarkResult } from './benchmark';
|
|
26
|
+
|
|
27
|
+
// Mock fs and path
|
|
28
|
+
jest.mock('fs');
|
|
29
|
+
jest.mock('path');
|
|
30
|
+
|
|
31
|
+
const mockFs = fs as jest.Mocked<typeof fs>;
|
|
32
|
+
const mockPath = path as jest.Mocked<typeof path>;
|
|
33
|
+
|
|
34
|
+
describe('calculateBenchmark', () => {
|
|
35
|
+
beforeEach(() => {
|
|
36
|
+
jest.clearAllMocks();
|
|
37
|
+
mockPath.resolve.mockImplementation((p: string) => p);
|
|
38
|
+
mockPath.join.mockImplementation((...args: string[]) => args.join('/'));
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it('returns weighted scores for a workspace with all stages', () => {
|
|
42
|
+
mockFs.existsSync.mockReturnValue(true);
|
|
43
|
+
mockFs.readdirSync.mockReturnValue([
|
|
44
|
+
{ name: '00-meta', isDirectory: () => true },
|
|
45
|
+
{ name: '01-ideation', isDirectory: () => true },
|
|
46
|
+
{ name: '02-research', isDirectory: () => true },
|
|
47
|
+
{ name: '03-architecture', isDirectory: () => true },
|
|
48
|
+
] as fs.Dirent[]);
|
|
49
|
+
mockFs.readFileSync.mockReturnValue('purpose: test\ninput: none\noutput: test\ndependencies: none');
|
|
50
|
+
|
|
51
|
+
const result = calculateBenchmark('/test-workspace');
|
|
52
|
+
|
|
53
|
+
expect(result.stages).toHaveLength(3);
|
|
54
|
+
expect(result.stages[0].name).toBe('01-ideation');
|
|
55
|
+
expect(result.stages[0].weight).toBe(1.5);
|
|
56
|
+
expect(result.stages[1].name).toBe('02-research');
|
|
57
|
+
expect(result.stages[1].weight).toBe(1.3);
|
|
58
|
+
expect(result.stages[2].name).toBe('03-architecture');
|
|
59
|
+
expect(result.stages[2].weight).toBe(1.2);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('excludes missing stages from calculation', () => {
|
|
63
|
+
mockFs.existsSync.mockReturnValue(true);
|
|
64
|
+
mockFs.readdirSync.mockReturnValue([
|
|
65
|
+
{ name: '00-meta', isDirectory: () => true },
|
|
66
|
+
{ name: '01-ideation', isDirectory: () => true },
|
|
67
|
+
] as fs.Dirent[]);
|
|
68
|
+
mockFs.readFileSync.mockReturnValue('purpose: test\ninput: none\noutput: test\ndependencies: none');
|
|
69
|
+
|
|
70
|
+
const result = calculateBenchmark('/test-workspace');
|
|
71
|
+
|
|
72
|
+
expect(result.stages).toHaveLength(1);
|
|
73
|
+
expect(result.stages[0].name).toBe('01-ideation');
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it('normalizes final score to 0-100', () => {
|
|
77
|
+
mockFs.existsSync.mockReturnValue(true);
|
|
78
|
+
mockFs.readdirSync.mockReturnValue([
|
|
79
|
+
{ name: '00-meta', isDirectory: () => true },
|
|
80
|
+
{ name: '01-ideation', isDirectory: () => true },
|
|
81
|
+
] as fs.Dirent[]);
|
|
82
|
+
mockFs.readFileSync.mockReturnValue('purpose: test\ninput: none\noutput: test\ndependencies: none');
|
|
83
|
+
|
|
84
|
+
const result = calculateBenchmark('/test-workspace');
|
|
85
|
+
|
|
86
|
+
expect(result.weightedScore).toBeGreaterThanOrEqual(0);
|
|
87
|
+
expect(result.weightedScore).toBeLessThanOrEqual(100);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it('returns empty stages for workspace with no numbered folders', () => {
|
|
91
|
+
mockFs.existsSync.mockReturnValue(false);
|
|
92
|
+
mockFs.readdirSync.mockReturnValue([
|
|
93
|
+
{ name: '00-meta', isDirectory: () => true },
|
|
94
|
+
] as fs.Dirent[]);
|
|
95
|
+
|
|
96
|
+
const result = calculateBenchmark('/test-workspace');
|
|
97
|
+
|
|
98
|
+
expect(result.stages).toHaveLength(0);
|
|
99
|
+
expect(result.weightedScore).toBe(0);
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
describe('formatBenchmarkTable', () => {
|
|
104
|
+
it('formats a benchmark result as a console table', () => {
|
|
105
|
+
const data: BenchmarkResult = {
|
|
106
|
+
workspace: 'test-ws',
|
|
107
|
+
agent: 'opencode',
|
|
108
|
+
timestamp: '2026-04-07T00:00:00Z',
|
|
109
|
+
rawScore: 72,
|
|
110
|
+
weightedScore: 78,
|
|
111
|
+
stages: [
|
|
112
|
+
{ name: '01-ideation', raw: 85, weight: 1.5, weighted: 95 },
|
|
113
|
+
{ name: '02-research', raw: 60, weight: 1.3, weighted: 58 },
|
|
114
|
+
],
|
|
115
|
+
fixSuggestions: ['Add research sources'],
|
|
116
|
+
improvementPotential: true,
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
const table = formatBenchmarkTable(data);
|
|
120
|
+
|
|
121
|
+
expect(table).toContain('01-ideation');
|
|
122
|
+
expect(table).toContain('02-research');
|
|
123
|
+
expect(table).toContain('78');
|
|
124
|
+
expect(table).toContain('TOTAL');
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it('handles empty stages gracefully', () => {
|
|
128
|
+
const data: BenchmarkResult = {
|
|
129
|
+
workspace: 'test-ws',
|
|
130
|
+
agent: 'opencode',
|
|
131
|
+
timestamp: '2026-04-07T00:00:00Z',
|
|
132
|
+
rawScore: 0,
|
|
133
|
+
weightedScore: 0,
|
|
134
|
+
stages: [],
|
|
135
|
+
fixSuggestions: [],
|
|
136
|
+
improvementPotential: false,
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
const table = formatBenchmarkTable(data);
|
|
140
|
+
|
|
141
|
+
expect(table).toContain('0');
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
- [ ] **Step 2: Run tests to verify they fail**
|
|
147
|
+
|
|
148
|
+
Run: `npm test -- tests/benchmark.test.ts`
|
|
149
|
+
Expected: FAIL with "Cannot find module './benchmark'"
|
|
150
|
+
|
|
151
|
+
- [ ] **Step 3: Implement `src/scripts/benchmark.ts`**
|
|
152
|
+
|
|
153
|
+
```typescript
|
|
154
|
+
import * as fs from 'fs';
|
|
155
|
+
import * as path from 'path';
|
|
156
|
+
|
|
157
|
+
export interface StageBenchmark {
|
|
158
|
+
name: string;
|
|
159
|
+
raw: number;
|
|
160
|
+
weight: number;
|
|
161
|
+
weighted: number;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export interface BenchmarkResult {
|
|
165
|
+
workspace: string;
|
|
166
|
+
agent: string;
|
|
167
|
+
timestamp: string;
|
|
168
|
+
rawScore: number;
|
|
169
|
+
weightedScore: number;
|
|
170
|
+
stages: StageBenchmark[];
|
|
171
|
+
fixSuggestions: string[];
|
|
172
|
+
improvementPotential: boolean;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const STAGE_WEIGHTS: Record<string, number> = {
|
|
176
|
+
'01-ideation': 1.5,
|
|
177
|
+
'02-research': 1.3,
|
|
178
|
+
'03-architecture': 1.2,
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
const DEFAULT_WEIGHT = 1.0;
|
|
182
|
+
const MAX_RAW_SCORE = 45; // Per-stage cap from validate.ts
|
|
183
|
+
|
|
184
|
+
export function calculateBenchmark(workspacePath: string): BenchmarkResult {
|
|
185
|
+
const ws = path.resolve(workspacePath);
|
|
186
|
+
const stageFolders = getNumberedFolders(ws);
|
|
187
|
+
|
|
188
|
+
const stages: StageBenchmark[] = [];
|
|
189
|
+
let totalWeighted = 0;
|
|
190
|
+
let totalWeight = 0;
|
|
191
|
+
|
|
192
|
+
for (const folder of stageFolders) {
|
|
193
|
+
const weight = STAGE_WEIGHTS[folder] ?? DEFAULT_WEIGHT;
|
|
194
|
+
const raw = calculateStageRawScore(ws, folder);
|
|
195
|
+
const weighted = (raw / MAX_RAW_SCORE) * 100 * weight;
|
|
196
|
+
|
|
197
|
+
stages.push({ name: folder, raw, weight, weighted });
|
|
198
|
+
totalWeighted += weighted;
|
|
199
|
+
totalWeight += weight;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const weightedScore = totalWeight > 0 ? totalWeighted / totalWeight : 0;
|
|
203
|
+
const rawScore = stages.reduce((sum, s) => sum + s.raw, 0);
|
|
204
|
+
|
|
205
|
+
const fixSuggestions = stages
|
|
206
|
+
.filter((s) => s.raw < MAX_RAW_SCORE)
|
|
207
|
+
.map((s) => `Improve ${s.name}: current score ${s.raw}/${MAX_RAW_SCORE}`);
|
|
208
|
+
|
|
209
|
+
return {
|
|
210
|
+
workspace: path.basename(ws),
|
|
211
|
+
agent: 'unknown',
|
|
212
|
+
timestamp: new Date().toISOString(),
|
|
213
|
+
rawScore,
|
|
214
|
+
weightedScore: Math.min(Math.round(weightedScore), 100),
|
|
215
|
+
stages,
|
|
216
|
+
fixSuggestions,
|
|
217
|
+
improvementPotential: stages.some((s) => s.raw < MAX_RAW_SCORE),
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
export function formatBenchmarkTable(data: BenchmarkResult): string {
|
|
222
|
+
const lines: string[] = [];
|
|
223
|
+
|
|
224
|
+
lines.push(`\nBenchmark Report: ${data.workspace}`);
|
|
225
|
+
lines.push(`Agent: ${data.agent} | Timestamp: ${data.timestamp}`);
|
|
226
|
+
lines.push('');
|
|
227
|
+
lines.push(
|
|
228
|
+
padRight('Stage', 20) +
|
|
229
|
+
padRight('Raw', 8) +
|
|
230
|
+
padRight('Weight', 10) +
|
|
231
|
+
padRight('Weighted', 12)
|
|
232
|
+
);
|
|
233
|
+
lines.push('-'.repeat(50));
|
|
234
|
+
|
|
235
|
+
for (const stage of data.stages) {
|
|
236
|
+
lines.push(
|
|
237
|
+
padRight(stage.name, 20) +
|
|
238
|
+
padRight(String(stage.raw), 8) +
|
|
239
|
+
padRight(stage.weight.toFixed(1) + 'x', 10) +
|
|
240
|
+
padRight(stage.weighted.toFixed(1), 12)
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
lines.push('-'.repeat(50));
|
|
245
|
+
lines.push(
|
|
246
|
+
padRight('TOTAL', 20) +
|
|
247
|
+
padRight(String(data.rawScore), 8) +
|
|
248
|
+
padRight('', 10) +
|
|
249
|
+
padRight(data.weightedScore.toFixed(1), 12)
|
|
250
|
+
);
|
|
251
|
+
lines.push('');
|
|
252
|
+
|
|
253
|
+
if (data.fixSuggestions.length > 0) {
|
|
254
|
+
lines.push('Suggestions:');
|
|
255
|
+
for (const suggestion of data.fixSuggestions) {
|
|
256
|
+
lines.push(` - ${suggestion}`);
|
|
257
|
+
}
|
|
258
|
+
lines.push('');
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return lines.join('\n');
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function calculateStageRawScore(ws: string, folder: string): number {
|
|
265
|
+
const stageContextPath = path.join(ws, folder, 'CONTEXT.md');
|
|
266
|
+
let score = 0;
|
|
267
|
+
|
|
268
|
+
if (fs.existsSync(stageContextPath)) {
|
|
269
|
+
const content = fs.readFileSync(stageContextPath, 'utf-8');
|
|
270
|
+
if (content.toLowerCase().includes('purpose') || content.toLowerCase().includes('## purpose')) score += 4;
|
|
271
|
+
if (content.toLowerCase().includes('input')) score += 4;
|
|
272
|
+
if (content.toLowerCase().includes('output')) score += 4;
|
|
273
|
+
if (content.toLowerCase().includes('dependenc')) score += 3;
|
|
274
|
+
// Additional checks for more granular scoring
|
|
275
|
+
if (content.toLowerCase().includes('## success criteria') || content.toLowerCase().includes('success criteria')) score += 5;
|
|
276
|
+
if (content.toLowerCase().includes('## approach') || content.toLowerCase().includes('approach')) score += 5;
|
|
277
|
+
if (content.toLowerCase().includes('## risks') || content.toLowerCase().includes('risks')) score += 5;
|
|
278
|
+
if (content.toLowerCase().includes('## timeline') || content.toLowerCase().includes('timeline')) score += 5;
|
|
279
|
+
if (content.toLowerCase().includes('## resources') || content.toLowerCase().includes('resources')) score += 5;
|
|
280
|
+
if (content.toLowerCase().includes('## validation') || content.toLowerCase().includes('validation')) score += 5;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return Math.min(score, MAX_RAW_SCORE);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function getNumberedFolders(workspacePath: string): string[] {
|
|
287
|
+
if (!fs.existsSync(workspacePath)) return [];
|
|
288
|
+
const entries = fs.readdirSync(workspacePath, { withFileTypes: true });
|
|
289
|
+
return entries
|
|
290
|
+
.filter((e) => e.isDirectory() && /^\d{2}-/.test(e.name) && e.name !== '00-meta')
|
|
291
|
+
.map((e) => e.name);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function padRight(str: string, length: number): string {
|
|
295
|
+
return str.padEnd(length);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
if (require.main === module) {
|
|
299
|
+
const args = process.argv.slice(2);
|
|
300
|
+
const workspace = args.find((_, i) => i > 0 && args[i - 1] === '--workspace') || args[0];
|
|
301
|
+
|
|
302
|
+
if (!workspace) {
|
|
303
|
+
console.error('Usage: node benchmark.ts --workspace <path>');
|
|
304
|
+
process.exit(1);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const result = calculateBenchmark(workspace);
|
|
308
|
+
console.log(formatBenchmarkTable(result));
|
|
309
|
+
}
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
- [ ] **Step 4: Run tests to verify they pass**
|
|
313
|
+
|
|
314
|
+
Run: `npm test -- tests/benchmark.test.ts`
|
|
315
|
+
Expected: All 6 tests pass
|
|
316
|
+
|
|
317
|
+
- [ ] **Step 5: Commit**
|
|
318
|
+
|
|
319
|
+
```bash
|
|
320
|
+
git add src/scripts/benchmark.ts tests/benchmark.test.ts
|
|
321
|
+
git commit -m "feat: add weighted benchmark scoring engine"
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
### Task 2: Add `saveBenchmarkReport` Function
|
|
327
|
+
|
|
328
|
+
**Files:**
|
|
329
|
+
- Modify: `src/scripts/benchmark.ts`
|
|
330
|
+
- Test: `tests/benchmark.test.ts`
|
|
331
|
+
|
|
332
|
+
- [ ] **Step 1: Write tests for saveBenchmarkReport**
|
|
333
|
+
|
|
334
|
+
```typescript
|
|
335
|
+
// Add to tests/benchmark.test.ts
|
|
336
|
+
import { saveBenchmarkReport } from './benchmark';
|
|
337
|
+
|
|
338
|
+
describe('saveBenchmarkReport', () => {
|
|
339
|
+
it('saves benchmark report to .workspace-benchmarks directory', () => {
|
|
340
|
+
const data: BenchmarkResult = {
|
|
341
|
+
workspace: 'test-ws',
|
|
342
|
+
agent: 'opencode',
|
|
343
|
+
timestamp: '2026-04-07T00:00:00Z',
|
|
344
|
+
rawScore: 72,
|
|
345
|
+
weightedScore: 78,
|
|
346
|
+
stages: [],
|
|
347
|
+
fixSuggestions: [],
|
|
348
|
+
improvementPotential: false,
|
|
349
|
+
};
|
|
350
|
+
|
|
351
|
+
saveBenchmarkReport('/test-workspace', data);
|
|
352
|
+
|
|
353
|
+
expect(mockFs.mkdirSync).toHaveBeenCalledWith(
|
|
354
|
+
expect.stringContaining('.workspace-benchmarks'),
|
|
355
|
+
{ recursive: true }
|
|
356
|
+
);
|
|
357
|
+
expect(mockFs.writeFileSync).toHaveBeenCalledWith(
|
|
358
|
+
expect.stringContaining('test-ws-'),
|
|
359
|
+
expect.stringContaining('"weightedScore":78'),
|
|
360
|
+
'utf-8'
|
|
361
|
+
);
|
|
362
|
+
});
|
|
363
|
+
});
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
- [ ] **Step 2: Run test to verify it fails**
|
|
367
|
+
|
|
368
|
+
Run: `npm test -- tests/benchmark.test.ts -t "saveBenchmarkReport"`
|
|
369
|
+
Expected: FAIL with "saveBenchmarkReport is not defined"
|
|
370
|
+
|
|
371
|
+
- [ ] **Step 3: Implement saveBenchmarkReport**
|
|
372
|
+
|
|
373
|
+
Add to `src/scripts/benchmark.ts`:
|
|
374
|
+
|
|
375
|
+
```typescript
|
|
376
|
+
export function saveBenchmarkReport(workspacePath: string, data: BenchmarkResult): string {
|
|
377
|
+
const reportDir = path.join(workspacePath, '.workspace-benchmarks');
|
|
378
|
+
fs.mkdirSync(reportDir, { recursive: true });
|
|
379
|
+
|
|
380
|
+
const filename = `${data.workspace}-${data.timestamp.replace(/[:.]/g, '-')}.json`;
|
|
381
|
+
const filePath = path.join(reportDir, filename);
|
|
382
|
+
|
|
383
|
+
fs.writeFileSync(filePath, JSON.stringify(data, null, 2), 'utf-8');
|
|
384
|
+
return filePath;
|
|
385
|
+
}
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
- [ ] **Step 4: Run tests to verify they pass**
|
|
389
|
+
|
|
390
|
+
Run: `npm test -- tests/benchmark.test.ts`
|
|
391
|
+
Expected: All 7 tests pass
|
|
392
|
+
|
|
393
|
+
- [ ] **Step 5: Commit**
|
|
394
|
+
|
|
395
|
+
```bash
|
|
396
|
+
git add src/scripts/benchmark.ts tests/benchmark.test.ts
|
|
397
|
+
git commit -m "feat: add saveBenchmarkReport function"
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
---
|
|
401
|
+
|
|
402
|
+
### Task 3: Enhance `src/install.ts` with Multi-Agent Targeting
|
|
403
|
+
|
|
404
|
+
**Files:**
|
|
405
|
+
- Modify: `src/install.ts`
|
|
406
|
+
- Test: `tests/install.test.ts`
|
|
407
|
+
|
|
408
|
+
- [ ] **Step 1: Write tests for agent-targeted installation**
|
|
409
|
+
|
|
410
|
+
```typescript
|
|
411
|
+
// Add to tests/install.test.ts
|
|
412
|
+
import { getAgentTargetPath } from '../install';
|
|
413
|
+
|
|
414
|
+
describe('getAgentTargetPath', () => {
|
|
415
|
+
it('returns default path for no agent', () => {
|
|
416
|
+
const result = getAgentTargetPath('/project-root', undefined);
|
|
417
|
+
expect(result).toBe('/project-root/.agents/skills/workspace-maxxing');
|
|
418
|
+
});
|
|
419
|
+
|
|
420
|
+
it('returns opencode path for --opencode flag', () => {
|
|
421
|
+
const result = getAgentTargetPath('/project-root', 'opencode');
|
|
422
|
+
expect(result).toBe('/project-root/.agents/skills/workspace-maxxing');
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
it('returns claude path for --claude flag', () => {
|
|
426
|
+
const result = getAgentTargetPath('/project-root', 'claude');
|
|
427
|
+
expect(result).toBe('/project-root/.claude/skills');
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
it('returns copilot path for --copilot flag', () => {
|
|
431
|
+
const result = getAgentTargetPath('/project-root', 'copilot');
|
|
432
|
+
expect(result).toBe('/project-root/.github/copilot-instructions');
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
it('returns gemini path for --gemini flag', () => {
|
|
436
|
+
const result = getAgentTargetPath('/project-root', 'gemini');
|
|
437
|
+
expect(result).toBe('/project-root/.gemini/skills');
|
|
438
|
+
});
|
|
439
|
+
});
|
|
440
|
+
```
|
|
441
|
+
|
|
442
|
+
- [ ] **Step 2: Run tests to verify they fail**
|
|
443
|
+
|
|
444
|
+
Run: `npm test -- tests/install.test.ts -t "getAgentTargetPath"`
|
|
445
|
+
Expected: FAIL with "getAgentTargetPath is not defined"
|
|
446
|
+
|
|
447
|
+
- [ ] **Step 3: Implement agent targeting in install.ts**
|
|
448
|
+
|
|
449
|
+
Add to `src/install.ts`:
|
|
450
|
+
|
|
451
|
+
```typescript
|
|
452
|
+
export type AgentTarget = 'opencode' | 'claude' | 'copilot' | 'gemini' | undefined;
|
|
453
|
+
|
|
454
|
+
const AGENT_PATHS: Record<string, string> = {
|
|
455
|
+
opencode: '.agents/skills/workspace-maxxing',
|
|
456
|
+
claude: '.claude/skills',
|
|
457
|
+
copilot: '.github/copilot-instructions',
|
|
458
|
+
gemini: '.gemini/skills',
|
|
459
|
+
};
|
|
460
|
+
|
|
461
|
+
export function getAgentTargetPath(projectRoot: string, agent: AgentTarget): string {
|
|
462
|
+
const relativePath = AGENT_PATHS[agent ?? 'opencode'];
|
|
463
|
+
return path.join(projectRoot, relativePath);
|
|
464
|
+
}
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
Modify `installSkill` to accept agent parameter:
|
|
468
|
+
|
|
469
|
+
```typescript
|
|
470
|
+
export async function installSkill(
|
|
471
|
+
projectRoot: string,
|
|
472
|
+
templatesDir: string,
|
|
473
|
+
agent: AgentTarget = undefined,
|
|
474
|
+
): Promise<InstallResult> {
|
|
475
|
+
const skillDir = getAgentTargetPath(projectRoot, agent);
|
|
476
|
+
// ... rest of existing implementation unchanged
|
|
477
|
+
}
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
- [ ] **Step 4: Run tests to verify they pass**
|
|
481
|
+
|
|
482
|
+
Run: `npm test -- tests/install.test.ts`
|
|
483
|
+
Expected: All existing tests + 5 new tests pass
|
|
484
|
+
|
|
485
|
+
- [ ] **Step 5: Commit**
|
|
486
|
+
|
|
487
|
+
```bash
|
|
488
|
+
git add src/install.ts tests/install.test.ts
|
|
489
|
+
git commit -m "feat: add multi-agent installation targeting"
|
|
490
|
+
```
|
|
491
|
+
|
|
492
|
+
---
|
|
493
|
+
|
|
494
|
+
### Task 4: Update `src/index.ts` with New CLI Flags
|
|
495
|
+
|
|
496
|
+
**Files:**
|
|
497
|
+
- Modify: `src/index.ts`
|
|
498
|
+
- Test: `tests/cli.test.ts`
|
|
499
|
+
|
|
500
|
+
- [ ] **Step 1: Write tests for new CLI flags**
|
|
501
|
+
|
|
502
|
+
```typescript
|
|
503
|
+
// Add to tests/cli.test.ts
|
|
504
|
+
describe('CLI flags', () => {
|
|
505
|
+
it('accepts --claude flag', () => {
|
|
506
|
+
const { stdout } = execSync('node dist/index.js --claude', {
|
|
507
|
+
env: { ...process.env, WORKSPACE_MAXXING_TEMPLATES: templatesDir },
|
|
508
|
+
});
|
|
509
|
+
expect(stdout).toContain('Skill installed to');
|
|
510
|
+
expect(stdout).toContain('.claude/skills');
|
|
511
|
+
});
|
|
512
|
+
|
|
513
|
+
it('accepts --copilot flag', () => {
|
|
514
|
+
const { stdout } = execSync('node dist/index.js --copilot', {
|
|
515
|
+
env: { ...process.env, WORKSPACE_MAXXING_TEMPLATES: templatesDir },
|
|
516
|
+
});
|
|
517
|
+
expect(stdout).toContain('.github/copilot-instructions');
|
|
518
|
+
});
|
|
519
|
+
|
|
520
|
+
it('accepts --gemini flag', () => {
|
|
521
|
+
const { stdout } = execSync('node dist/index.js --gemini', {
|
|
522
|
+
env: { ...process.env, WORKSPACE_MAXXING_TEMPLATES: templatesDir },
|
|
523
|
+
});
|
|
524
|
+
expect(stdout).toContain('.gemini/skills');
|
|
525
|
+
});
|
|
526
|
+
});
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
- [ ] **Step 2: Run tests to verify they fail**
|
|
530
|
+
|
|
531
|
+
Run: `npm test -- tests/cli.test.ts -t "CLI flags"`
|
|
532
|
+
Expected: FAIL (flags not recognized yet)
|
|
533
|
+
|
|
534
|
+
- [ ] **Step 3: Update CLI flag handling**
|
|
535
|
+
|
|
536
|
+
Modify `src/index.ts`:
|
|
537
|
+
|
|
538
|
+
```typescript
|
|
539
|
+
import { detectProjectRoot, installSkill, AgentTarget } from './install';
|
|
540
|
+
|
|
541
|
+
function showHelp(): void {
|
|
542
|
+
console.log(`
|
|
543
|
+
workspace-maxxing — npx-installable skill for AI agents
|
|
544
|
+
|
|
545
|
+
Usage:
|
|
546
|
+
npx workspace-maxxing [options]
|
|
547
|
+
|
|
548
|
+
Options:
|
|
549
|
+
--opencode Install skill for OpenCode agents (default)
|
|
550
|
+
--claude Install skill for Claude Code agents
|
|
551
|
+
--copilot Install skill for GitHub Copilot agents
|
|
552
|
+
--gemini Install skill for Gemini CLI agents
|
|
553
|
+
--help Show this help message
|
|
554
|
+
|
|
555
|
+
Examples:
|
|
556
|
+
npx workspace-maxxing --opencode
|
|
557
|
+
npx workspace-maxxing --claude
|
|
558
|
+
npx workspace-maxxing --copilot
|
|
559
|
+
npx workspace-maxxing --gemini
|
|
560
|
+
`);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
async function main(): Promise<void> {
|
|
564
|
+
const args = process.argv.slice(2);
|
|
565
|
+
|
|
566
|
+
if (args.length === 0 || args.includes('--help')) {
|
|
567
|
+
showHelp();
|
|
568
|
+
process.exit(0);
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
const agentFlags: AgentTarget[] = ['opencode', 'claude', 'copilot', 'gemini'];
|
|
572
|
+
const detectedAgent = agentFlags.find((flag) => args.includes(`--${flag}`));
|
|
573
|
+
|
|
574
|
+
if (detectedAgent) {
|
|
575
|
+
const cwd = process.cwd();
|
|
576
|
+
const projectRoot = detectProjectRoot(cwd);
|
|
577
|
+
|
|
578
|
+
if (projectRoot !== cwd) {
|
|
579
|
+
console.log(`Detected project root: ${projectRoot}`);
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
const templatesDir =
|
|
583
|
+
process.env.WORKSPACE_MAXXING_TEMPLATES ??
|
|
584
|
+
path.join(__dirname, '..', 'templates');
|
|
585
|
+
|
|
586
|
+
console.log(`Installing workspace-maxxing skill for ${detectedAgent}...`);
|
|
587
|
+
const result = await installSkill(projectRoot, templatesDir, detectedAgent);
|
|
588
|
+
|
|
589
|
+
if (result.success) {
|
|
590
|
+
console.log(`Skill installed to: ${result.skillPath}`);
|
|
591
|
+
console.log(`Open a new ${detectedAgent} session and invoke the workspace-maxxing skill to get started.`);
|
|
592
|
+
} else {
|
|
593
|
+
console.error(`Installation failed: ${result.error}`);
|
|
594
|
+
process.exit(1);
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
return;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
console.error(`Unknown flag: ${args.find((a) => a.startsWith('--'))}`);
|
|
601
|
+
console.error('Run "npx workspace-maxxing --help" for usage.');
|
|
602
|
+
process.exit(1);
|
|
603
|
+
}
|
|
604
|
+
```
|
|
605
|
+
|
|
606
|
+
- [ ] **Step 4: Build and run tests**
|
|
607
|
+
|
|
608
|
+
Run: `npm run build && npm test -- tests/cli.test.ts`
|
|
609
|
+
Expected: All CLI tests pass
|
|
610
|
+
|
|
611
|
+
- [ ] **Step 5: Commit**
|
|
612
|
+
|
|
613
|
+
```bash
|
|
614
|
+
git add src/index.ts tests/cli.test.ts
|
|
615
|
+
git commit -m "feat: add --claude, --copilot, --gemini CLI flags"
|
|
616
|
+
```
|
|
617
|
+
|
|
618
|
+
---
|
|
619
|
+
|
|
620
|
+
### Task 5: Extend `src/scripts/iterate.ts` with Benchmark Data
|
|
621
|
+
|
|
622
|
+
**Files:**
|
|
623
|
+
- Modify: `src/scripts/iterate.ts`
|
|
624
|
+
- Test: `tests/iterate.test.ts`
|
|
625
|
+
|
|
626
|
+
- [ ] **Step 1: Write tests for benchmark integration**
|
|
627
|
+
|
|
628
|
+
```typescript
|
|
629
|
+
// Add to tests/iterate.test.ts
|
|
630
|
+
import { iterateWorkspace } from '../iterate';
|
|
631
|
+
import * as benchmark from '../benchmark';
|
|
632
|
+
|
|
633
|
+
jest.mock('../benchmark');
|
|
634
|
+
|
|
635
|
+
describe('iterateWorkspace with benchmark', () => {
|
|
636
|
+
it('includes benchmark data in result', () => {
|
|
637
|
+
(benchmark.calculateBenchmark as jest.Mock).mockReturnValue({
|
|
638
|
+
workspace: 'test-ws',
|
|
639
|
+
agent: 'opencode',
|
|
640
|
+
timestamp: '2026-04-07T00:00:00Z',
|
|
641
|
+
rawScore: 72,
|
|
642
|
+
weightedScore: 78,
|
|
643
|
+
stages: [{ name: '01-ideation', raw: 85, weight: 1.5, weighted: 95 }],
|
|
644
|
+
fixSuggestions: ['Add research sources'],
|
|
645
|
+
improvementPotential: true,
|
|
646
|
+
});
|
|
647
|
+
|
|
648
|
+
const result = iterateWorkspace('/test-workspace');
|
|
649
|
+
|
|
650
|
+
expect(result.benchmark).toBeDefined();
|
|
651
|
+
expect(result.benchmark?.weightedScore).toBe(78);
|
|
652
|
+
expect(result.benchmark?.improvementPotential).toBe(true);
|
|
653
|
+
});
|
|
654
|
+
|
|
655
|
+
it('passes agent flag to benchmark result', () => {
|
|
656
|
+
(benchmark.calculateBenchmark as jest.Mock).mockReturnValue({
|
|
657
|
+
workspace: 'test-ws',
|
|
658
|
+
agent: 'claude',
|
|
659
|
+
timestamp: '2026-04-07T00:00:00Z',
|
|
660
|
+
rawScore: 72,
|
|
661
|
+
weightedScore: 78,
|
|
662
|
+
stages: [],
|
|
663
|
+
fixSuggestions: [],
|
|
664
|
+
improvementPotential: false,
|
|
665
|
+
});
|
|
666
|
+
|
|
667
|
+
const result = iterateWorkspace('/test-workspace', { agent: 'claude' });
|
|
668
|
+
|
|
669
|
+
expect(result.benchmark?.agent).toBe('claude');
|
|
670
|
+
});
|
|
671
|
+
});
|
|
672
|
+
```
|
|
673
|
+
|
|
674
|
+
- [ ] **Step 2: Run tests to verify they fail**
|
|
675
|
+
|
|
676
|
+
Run: `npm test -- tests/iterate.test.ts -t "benchmark"`
|
|
677
|
+
Expected: FAIL (benchmark integration not implemented)
|
|
678
|
+
|
|
679
|
+
- [ ] **Step 3: Update IterateResult interface and iterateWorkspace function**
|
|
680
|
+
|
|
681
|
+
Modify `src/scripts/iterate.ts`:
|
|
682
|
+
|
|
683
|
+
```typescript
|
|
684
|
+
import { calculateBenchmark, BenchmarkResult } from './benchmark';
|
|
685
|
+
|
|
686
|
+
export interface IterateOptions {
|
|
687
|
+
maxRetries?: number;
|
|
688
|
+
agent?: string;
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
export interface IterateResult {
|
|
692
|
+
passes: {
|
|
693
|
+
validate: ValidatePassResult;
|
|
694
|
+
score: ScorePassResult;
|
|
695
|
+
checklist: ChecklistResult;
|
|
696
|
+
};
|
|
697
|
+
benchmark?: BenchmarkResult;
|
|
698
|
+
escalate: boolean;
|
|
699
|
+
}
|
|
700
|
+
```
|
|
701
|
+
|
|
702
|
+
Update `iterateWorkspace` function:
|
|
703
|
+
|
|
704
|
+
```typescript
|
|
705
|
+
export function iterateWorkspace(
|
|
706
|
+
workspacePath: string,
|
|
707
|
+
options: IterateOptions = {},
|
|
708
|
+
): IterateResult {
|
|
709
|
+
const { maxRetries = 3, agent = 'unknown' } = options;
|
|
710
|
+
const ws = path.resolve(workspacePath);
|
|
711
|
+
|
|
712
|
+
const validateResult = runValidatePass(ws, maxRetries);
|
|
713
|
+
const scoreResult = runScorePass(ws);
|
|
714
|
+
const checklistResult = runChecklist(ws);
|
|
715
|
+
const benchmarkResult = calculateBenchmark(ws);
|
|
716
|
+
benchmarkResult.agent = agent;
|
|
717
|
+
|
|
718
|
+
const result: IterateResult = {
|
|
719
|
+
passes: {
|
|
720
|
+
validate: validateResult,
|
|
721
|
+
score: scoreResult,
|
|
722
|
+
checklist: checklistResult,
|
|
723
|
+
},
|
|
724
|
+
benchmark: benchmarkResult,
|
|
725
|
+
escalate: validateResult.status === 'escalated',
|
|
726
|
+
};
|
|
727
|
+
|
|
728
|
+
console.log(JSON.stringify(result, null, 2));
|
|
729
|
+
|
|
730
|
+
return result;
|
|
731
|
+
}
|
|
732
|
+
```
|
|
733
|
+
|
|
734
|
+
- [ ] **Step 4: Run tests to verify they pass**
|
|
735
|
+
|
|
736
|
+
Run: `npm test -- tests/iterate.test.ts`
|
|
737
|
+
Expected: All iterate tests pass
|
|
738
|
+
|
|
739
|
+
- [ ] **Step 5: Commit**
|
|
740
|
+
|
|
741
|
+
```bash
|
|
742
|
+
git add src/scripts/iterate.ts tests/iterate.test.ts
|
|
743
|
+
git commit -m "feat: integrate benchmark data into iterate results"
|
|
744
|
+
```
|
|
745
|
+
|
|
746
|
+
---
|
|
747
|
+
|
|
748
|
+
### Task 6: Copy benchmark.ts to Templates
|
|
749
|
+
|
|
750
|
+
**Files:**
|
|
751
|
+
- Modify: `templates/.workspace-templates/scripts/` (copy benchmark.ts here)
|
|
752
|
+
- Modify: `src/install.ts` (ensure benchmark.ts is copied during install)
|
|
753
|
+
|
|
754
|
+
- [ ] **Step 1: Copy benchmark.ts to templates**
|
|
755
|
+
|
|
756
|
+
```bash
|
|
757
|
+
cp src/scripts/benchmark.ts templates/.workspace-templates/scripts/
|
|
758
|
+
```
|
|
759
|
+
|
|
760
|
+
- [ ] **Step 2: Verify installer copies benchmark.ts**
|
|
761
|
+
|
|
762
|
+
The installer already copies everything from `templates/.workspace-templates/scripts/` to the skill directory. Verify by checking `src/install.ts` line 79-83 — it copies the entire scripts directory recursively, so benchmark.ts will be included automatically.
|
|
763
|
+
|
|
764
|
+
- [ ] **Step 3: Run full test suite**
|
|
765
|
+
|
|
766
|
+
Run: `npm test`
|
|
767
|
+
Expected: All tests pass (baseline 75 + new tests)
|
|
768
|
+
|
|
769
|
+
- [ ] **Step 4: Commit**
|
|
770
|
+
|
|
771
|
+
```bash
|
|
772
|
+
git add templates/.workspace-templates/scripts/benchmark.ts
|
|
773
|
+
git commit -m "feat: include benchmark script in templates"
|
|
774
|
+
```
|
|
775
|
+
|
|
776
|
+
---
|
|
777
|
+
|
|
778
|
+
### Task 7: Update Templates SKILL.md
|
|
779
|
+
|
|
780
|
+
**Files:**
|
|
781
|
+
- Modify: `templates/SKILL.md`
|
|
782
|
+
|
|
783
|
+
- [ ] **Step 1: Add benchmark section to SKILL.md**
|
|
784
|
+
|
|
785
|
+
Add to `templates/SKILL.md` after the "Autonomous Iteration" section:
|
|
786
|
+
|
|
787
|
+
```markdown
|
|
788
|
+
## Benchmarking
|
|
789
|
+
|
|
790
|
+
Run benchmarks to assess workspace quality with weighted scoring:
|
|
791
|
+
|
|
792
|
+
\`\`\`bash
|
|
793
|
+
node .agents/skills/workspace-maxxing/scripts/benchmark.ts --workspace <workspace-path>
|
|
794
|
+
\`\`\`
|
|
795
|
+
|
|
796
|
+
**Weights:**
|
|
797
|
+
- `01-ideation`: 1.5x (core thinking quality)
|
|
798
|
+
- `02-research`: 1.3x (evidence gathering)
|
|
799
|
+
- `03-architecture`: 1.2x (structural decisions)
|
|
800
|
+
- All other stages: 1.0x
|
|
801
|
+
|
|
802
|
+
**Output:**
|
|
803
|
+
- Console: Formatted table with stage scores and suggestions
|
|
804
|
+
- JSON: Saved to `.workspace-benchmarks/<workspace>-<timestamp>.json`
|
|
805
|
+
|
|
806
|
+
**Integration with Iteration:**
|
|
807
|
+
The `iterate.ts` script now includes benchmark data in its return value. Use the `improvementPotential` field to decide whether to continue iterating.
|
|
808
|
+
```
|
|
809
|
+
|
|
810
|
+
- [ ] **Step 2: Update Available Scripts section**
|
|
811
|
+
|
|
812
|
+
Add benchmark.ts to the "Available Scripts" table in SKILL.md:
|
|
813
|
+
|
|
814
|
+
```markdown
|
|
815
|
+
| `benchmark.ts` | Run weighted benchmark scoring on a workspace |
|
|
816
|
+
```
|
|
817
|
+
|
|
818
|
+
- [ ] **Step 3: Run template tests**
|
|
819
|
+
|
|
820
|
+
Run: `npm test -- tests/templates.test.ts`
|
|
821
|
+
Expected: All template tests pass
|
|
822
|
+
|
|
823
|
+
- [ ] **Step 4: Commit**
|
|
824
|
+
|
|
825
|
+
```bash
|
|
826
|
+
git add templates/SKILL.md
|
|
827
|
+
git commit -m "docs: add benchmarking section to SKILL.md"
|
|
828
|
+
```
|
|
829
|
+
|
|
830
|
+
---
|
|
831
|
+
|
|
832
|
+
### Task 8: Full Test Suite & Final Commit
|
|
833
|
+
|
|
834
|
+
- [ ] **Step 1: Run full test suite**
|
|
835
|
+
|
|
836
|
+
Run: `npm test`
|
|
837
|
+
Expected: All tests pass (baseline 75 + all new tests from Tasks 1-7)
|
|
838
|
+
|
|
839
|
+
- [ ] **Step 2: Run build**
|
|
840
|
+
|
|
841
|
+
Run: `npm run build`
|
|
842
|
+
Expected: Build succeeds
|
|
843
|
+
|
|
844
|
+
- [ ] **Step 3: Run integration test**
|
|
845
|
+
|
|
846
|
+
Run: `npm test -- tests/integration.test.ts`
|
|
847
|
+
Expected: Integration tests pass
|
|
848
|
+
|
|
849
|
+
- [ ] **Step 4: Final commit**
|
|
850
|
+
|
|
851
|
+
```bash
|
|
852
|
+
git add .
|
|
853
|
+
git commit -m "feat(sub-project-4): complete benchmarking & multi-agent support"
|
|
854
|
+
```
|