@aion0/forge 0.5.23 → 0.5.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RELEASE_NOTES.md +5 -6
- package/app/api/smith-templates/route.ts +81 -0
- package/components/WorkspaceView.tsx +841 -83
- package/docs/Forge_Memory_Layer_Design.docx +0 -0
- package/docs/Forge_Strategy_Research_2026.docx +0 -0
- package/lib/claude-sessions.ts +2 -1
- package/lib/forge-mcp-server.ts +247 -33
- package/lib/help-docs/11-workspace.md +722 -166
- package/lib/project-sessions.ts +1 -1
- package/lib/telegram-bot.ts +1 -1
- package/lib/workspace/orchestrator.ts +263 -76
- package/lib/workspace/presets.ts +535 -58
- package/lib/workspace/requests.ts +287 -0
- package/lib/workspace/session-monitor.ts +4 -3
- package/lib/workspace/types.ts +1 -0
- package/lib/workspace/watch-manager.ts +1 -1
- package/lib/workspace-standalone.ts +1 -1
- package/next-env.d.ts +1 -1
- package/package.json +1 -1
- package/pnpm-workspace.yaml +1 -0
- package/scripts/bench/README.md +66 -0
- package/scripts/bench/results/.gitignore +2 -0
- package/scripts/bench/run.ts +635 -0
- package/scripts/bench/tasks/01-text-utils/task.md +26 -0
- package/scripts/bench/tasks/01-text-utils/validator.sh +46 -0
- package/scripts/bench/tasks/02-pagination/setup.sh +19 -0
- package/scripts/bench/tasks/02-pagination/task.md +48 -0
- package/scripts/bench/tasks/02-pagination/validator.sh +69 -0
- package/scripts/bench/tasks/03-bug-fix/setup.sh +82 -0
- package/scripts/bench/tasks/03-bug-fix/task.md +30 -0
- package/scripts/bench/tasks/03-bug-fix/validator.sh +29 -0
- package/templates/smith-lead.json +45 -0
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Request/Response Document System — structured YAML documents for
|
|
3
|
+
* multi-agent delivery workflows.
|
|
4
|
+
*
|
|
5
|
+
* Storage layout:
|
|
6
|
+
* <project>/.forge/requests/<id>/
|
|
7
|
+
* ├── request.yml — created by Architect
|
|
8
|
+
* └── response.yml — updated by Engineer, Reviewer, QA
|
|
9
|
+
*
|
|
10
|
+
* Inspired by Accord protocol: YAML frontmatter + structured content,
|
|
11
|
+
* status lifecycle (open → in_progress → review → qa → done).
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync } from 'node:fs';
|
|
15
|
+
import { join } from 'node:path';
|
|
16
|
+
import YAML from 'yaml';
|
|
17
|
+
|
|
18
|
+
// ─── Types ──────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
export type RequestStatus = 'open' | 'in_progress' | 'review' | 'qa' | 'done' | 'rejected';
|
|
21
|
+
export type RequestPriority = 'high' | 'medium' | 'low';
|
|
22
|
+
export type RequestType = 'feature' | 'bugfix' | 'refactor' | 'task';
|
|
23
|
+
|
|
24
|
+
export interface RequestModule {
|
|
25
|
+
name: string;
|
|
26
|
+
description: string;
|
|
27
|
+
acceptance_criteria: string[];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface RequestDocument {
|
|
31
|
+
id: string;
|
|
32
|
+
batch: string; // groups requests into a delivery
|
|
33
|
+
title: string;
|
|
34
|
+
description: string;
|
|
35
|
+
type: RequestType;
|
|
36
|
+
modules: RequestModule[];
|
|
37
|
+
priority: RequestPriority;
|
|
38
|
+
status: RequestStatus;
|
|
39
|
+
assigned_to: string; // agent label
|
|
40
|
+
created_by: string; // agent label
|
|
41
|
+
created_at: string; // ISO timestamp
|
|
42
|
+
updated_at: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface EngineerResponse {
|
|
46
|
+
completed_at?: string;
|
|
47
|
+
files_changed: string[];
|
|
48
|
+
notes: string;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface ReviewResponse {
|
|
52
|
+
completed_at?: string;
|
|
53
|
+
result: 'approved' | 'changes_requested' | 'rejected';
|
|
54
|
+
findings: Array<{ severity: string; description: string }>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface QaResponse {
|
|
58
|
+
completed_at?: string;
|
|
59
|
+
result: 'passed' | 'failed';
|
|
60
|
+
test_files: string[];
|
|
61
|
+
findings: Array<{ severity: string; description: string }>;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export interface ResponseDocument {
|
|
65
|
+
request_id: string;
|
|
66
|
+
status: RequestStatus;
|
|
67
|
+
engineer?: EngineerResponse;
|
|
68
|
+
review?: ReviewResponse;
|
|
69
|
+
qa?: QaResponse;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// ─── Paths ──────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
function requestsRoot(projectPath: string): string {
|
|
75
|
+
return join(projectPath, '.forge', 'requests');
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function requestDir(projectPath: string, requestId: string): string {
|
|
79
|
+
return join(requestsRoot(projectPath), requestId);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function ensureDir(dir: string): void {
|
|
83
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// ─── CRUD ───────────────────────────────────────────────
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Generate a request ID: REQ-YYYYMMDD-NNN
|
|
90
|
+
*/
|
|
91
|
+
export function generateRequestId(projectPath: string): string {
|
|
92
|
+
const now = new Date();
|
|
93
|
+
const date = `${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, '0')}${String(now.getDate()).padStart(2, '0')}`;
|
|
94
|
+
const root = requestsRoot(projectPath);
|
|
95
|
+
if (!existsSync(root)) return `REQ-${date}-001`;
|
|
96
|
+
const existing = readdirSync(root).filter(f => f.startsWith(`REQ-${date}-`));
|
|
97
|
+
const num = existing.length + 1;
|
|
98
|
+
return `REQ-${date}-${String(num).padStart(3, '0')}`;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Create a new request document.
|
|
103
|
+
* Returns the relative path to request.yml (for use as ref in bus messages).
|
|
104
|
+
*/
|
|
105
|
+
export function createRequest(projectPath: string, doc: Omit<RequestDocument, 'id' | 'created_at' | 'updated_at'> & { id?: string }): string {
|
|
106
|
+
const id = doc.id || generateRequestId(projectPath);
|
|
107
|
+
const dir = requestDir(projectPath, id);
|
|
108
|
+
ensureDir(dir);
|
|
109
|
+
|
|
110
|
+
const now = new Date().toISOString();
|
|
111
|
+
const full: RequestDocument = {
|
|
112
|
+
...doc,
|
|
113
|
+
id,
|
|
114
|
+
status: doc.status || 'open',
|
|
115
|
+
created_at: now,
|
|
116
|
+
updated_at: now,
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
const filePath = join(dir, 'request.yml');
|
|
120
|
+
writeFileSync(filePath, YAML.stringify(full), 'utf-8');
|
|
121
|
+
console.log(`[requests] Created ${id}: ${doc.title}`);
|
|
122
|
+
|
|
123
|
+
// Return relative path for bus ref
|
|
124
|
+
return `.forge/requests/${id}/request.yml`;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Get a request and its optional response.
|
|
129
|
+
*/
|
|
130
|
+
export function getRequest(projectPath: string, requestId: string): { request: RequestDocument; response?: ResponseDocument } | null {
|
|
131
|
+
const dir = requestDir(projectPath, requestId);
|
|
132
|
+
const reqFile = join(dir, 'request.yml');
|
|
133
|
+
if (!existsSync(reqFile)) return null;
|
|
134
|
+
|
|
135
|
+
try {
|
|
136
|
+
const request: RequestDocument = YAML.parse(readFileSync(reqFile, 'utf-8'));
|
|
137
|
+
let response: ResponseDocument | undefined;
|
|
138
|
+
const resFile = join(dir, 'response.yml');
|
|
139
|
+
if (existsSync(resFile)) {
|
|
140
|
+
response = YAML.parse(readFileSync(resFile, 'utf-8'));
|
|
141
|
+
}
|
|
142
|
+
return { request, response };
|
|
143
|
+
} catch (err: any) {
|
|
144
|
+
console.error(`[requests] Failed to read ${requestId}: ${err.message}`);
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* List all requests, optionally filtered by batch or status.
|
|
151
|
+
*/
|
|
152
|
+
export function listRequests(projectPath: string, opts?: { batch?: string; status?: RequestStatus }): RequestDocument[] {
|
|
153
|
+
const root = requestsRoot(projectPath);
|
|
154
|
+
if (!existsSync(root)) return [];
|
|
155
|
+
|
|
156
|
+
const results: RequestDocument[] = [];
|
|
157
|
+
for (const entry of readdirSync(root, { withFileTypes: true })) {
|
|
158
|
+
if (!entry.isDirectory()) continue;
|
|
159
|
+
const reqFile = join(root, entry.name, 'request.yml');
|
|
160
|
+
if (!existsSync(reqFile)) continue;
|
|
161
|
+
try {
|
|
162
|
+
const doc: RequestDocument = YAML.parse(readFileSync(reqFile, 'utf-8'));
|
|
163
|
+
if (opts?.batch && doc.batch !== opts.batch) continue;
|
|
164
|
+
if (opts?.status && doc.status !== opts.status) continue;
|
|
165
|
+
results.push(doc);
|
|
166
|
+
} catch {}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return results.sort((a, b) => a.created_at.localeCompare(b.created_at));
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Update a response document section (engineer, review, or qa).
|
|
174
|
+
* Automatically advances request status:
|
|
175
|
+
* engineer → review, review(approved) → qa, qa(passed) → done
|
|
176
|
+
* Returns the relative path to response.yml.
|
|
177
|
+
*/
|
|
178
|
+
export function updateResponse(
|
|
179
|
+
projectPath: string,
|
|
180
|
+
requestId: string,
|
|
181
|
+
section: 'engineer' | 'review' | 'qa',
|
|
182
|
+
data: Record<string, any>,
|
|
183
|
+
): string {
|
|
184
|
+
const dir = requestDir(projectPath, requestId);
|
|
185
|
+
const reqFile = join(dir, 'request.yml');
|
|
186
|
+
if (!existsSync(reqFile)) throw new Error(`Request ${requestId} not found`);
|
|
187
|
+
|
|
188
|
+
ensureDir(dir);
|
|
189
|
+
const resFile = join(dir, 'response.yml');
|
|
190
|
+
|
|
191
|
+
// Load or create response
|
|
192
|
+
let response: ResponseDocument;
|
|
193
|
+
if (existsSync(resFile)) {
|
|
194
|
+
response = YAML.parse(readFileSync(resFile, 'utf-8'));
|
|
195
|
+
} else {
|
|
196
|
+
response = { request_id: requestId, status: 'in_progress' };
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Update section with timestamp
|
|
200
|
+
const now = new Date().toISOString();
|
|
201
|
+
(response as any)[section] = { ...data, completed_at: now };
|
|
202
|
+
|
|
203
|
+
// Auto-advance status
|
|
204
|
+
const request: RequestDocument = YAML.parse(readFileSync(reqFile, 'utf-8'));
|
|
205
|
+
let newStatus: RequestStatus = request.status;
|
|
206
|
+
|
|
207
|
+
if (section === 'engineer') {
|
|
208
|
+
newStatus = 'review';
|
|
209
|
+
} else if (section === 'review') {
|
|
210
|
+
newStatus = data.result === 'rejected' ? 'rejected' : data.result === 'changes_requested' ? 'in_progress' : 'qa';
|
|
211
|
+
} else if (section === 'qa') {
|
|
212
|
+
newStatus = data.result === 'passed' ? 'done' : 'in_progress'; // failed → back to engineer
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
response.status = newStatus;
|
|
216
|
+
request.status = newStatus;
|
|
217
|
+
request.updated_at = now;
|
|
218
|
+
|
|
219
|
+
// Write both files
|
|
220
|
+
writeFileSync(resFile, YAML.stringify(response), 'utf-8');
|
|
221
|
+
writeFileSync(reqFile, YAML.stringify(request), 'utf-8');
|
|
222
|
+
|
|
223
|
+
console.log(`[requests] ${requestId}: ${section} updated → status=${newStatus}`);
|
|
224
|
+
return `.forge/requests/${requestId}/response.yml`;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Claim a request — set assigned_to and status to in_progress.
|
|
229
|
+
* Returns true if claimed successfully, false if already claimed by someone else.
|
|
230
|
+
*/
|
|
231
|
+
export function claimRequest(projectPath: string, requestId: string, agentLabel: string): { ok: boolean; claimedBy?: string } {
|
|
232
|
+
const dir = requestDir(projectPath, requestId);
|
|
233
|
+
const reqFile = join(dir, 'request.yml');
|
|
234
|
+
if (!existsSync(reqFile)) throw new Error(`Request ${requestId} not found`);
|
|
235
|
+
|
|
236
|
+
const doc: RequestDocument = YAML.parse(readFileSync(reqFile, 'utf-8'));
|
|
237
|
+
|
|
238
|
+
// Already claimed by someone else
|
|
239
|
+
if (doc.assigned_to && doc.assigned_to !== agentLabel) {
|
|
240
|
+
return { ok: false, claimedBy: doc.assigned_to };
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Already claimed by this agent (idempotent)
|
|
244
|
+
if (doc.assigned_to === agentLabel) {
|
|
245
|
+
return { ok: true };
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Only open requests can be claimed
|
|
249
|
+
if (doc.status !== 'open') {
|
|
250
|
+
return { ok: false, claimedBy: doc.assigned_to || `(status: ${doc.status})` };
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
doc.assigned_to = agentLabel;
|
|
254
|
+
doc.status = 'in_progress';
|
|
255
|
+
doc.updated_at = new Date().toISOString();
|
|
256
|
+
writeFileSync(reqFile, YAML.stringify(doc), 'utf-8');
|
|
257
|
+
console.log(`[requests] ${requestId}: claimed by ${agentLabel}`);
|
|
258
|
+
return { ok: true };
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Manually update request status.
|
|
263
|
+
*/
|
|
264
|
+
export function updateRequestStatus(projectPath: string, requestId: string, status: RequestStatus): void {
|
|
265
|
+
const dir = requestDir(projectPath, requestId);
|
|
266
|
+
const reqFile = join(dir, 'request.yml');
|
|
267
|
+
if (!existsSync(reqFile)) throw new Error(`Request ${requestId} not found`);
|
|
268
|
+
|
|
269
|
+
const doc: RequestDocument = YAML.parse(readFileSync(reqFile, 'utf-8'));
|
|
270
|
+
doc.status = status;
|
|
271
|
+
doc.updated_at = new Date().toISOString();
|
|
272
|
+
writeFileSync(reqFile, YAML.stringify(doc), 'utf-8');
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Get batch completion status.
|
|
277
|
+
*/
|
|
278
|
+
export function getBatchStatus(projectPath: string, batch: string): { total: number; done: number; allDone: boolean; requests: Array<{ id: string; title: string; status: RequestStatus }> } {
|
|
279
|
+
const all = listRequests(projectPath, { batch });
|
|
280
|
+
const done = all.filter(r => r.status === 'done' || r.status === 'rejected').length;
|
|
281
|
+
return {
|
|
282
|
+
total: all.length,
|
|
283
|
+
done,
|
|
284
|
+
allDone: all.length > 0 && done === all.length,
|
|
285
|
+
requests: all.map(r => ({ id: r.id, title: r.title, status: r.status })),
|
|
286
|
+
};
|
|
287
|
+
}
|
|
@@ -28,8 +28,8 @@ export interface SessionMonitorEvent {
|
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
const POLL_INTERVAL = 3000; // check every 3s
|
|
31
|
-
const IDLE_THRESHOLD =
|
|
32
|
-
const STABLE_THRESHOLD =
|
|
31
|
+
const IDLE_THRESHOLD = 29 * 60 * 1000; // 29min of no file change → check for result entry
|
|
32
|
+
const STABLE_THRESHOLD = 30 * 60 * 1000; // 30min of no change → force done (fallback if hook missed)
|
|
33
33
|
|
|
34
34
|
export class SessionFileMonitor extends EventEmitter {
|
|
35
35
|
private timers = new Map<string, NodeJS.Timeout>();
|
|
@@ -108,7 +108,8 @@ export class SessionFileMonitor extends EventEmitter {
|
|
|
108
108
|
static resolveSessionPath(projectPath: string, workDir: string | undefined, sessionId: string): string {
|
|
109
109
|
const fullPath = workDir && workDir !== './' && workDir !== '.'
|
|
110
110
|
? join(projectPath, workDir) : projectPath;
|
|
111
|
-
|
|
111
|
+
// Claude Code encodes paths by replacing all non-alphanumeric chars with '-'
|
|
112
|
+
const encoded = resolve(fullPath).replace(/[^a-zA-Z0-9]/g, '-');
|
|
112
113
|
return join(homedir(), '.claude', 'projects', encoded, `${sessionId}.jsonl`);
|
|
113
114
|
}
|
|
114
115
|
|
package/lib/workspace/types.ts
CHANGED
|
@@ -147,6 +147,7 @@ export interface BusMessage {
|
|
|
147
147
|
content?: string; // natural language message
|
|
148
148
|
files?: string[]; // related file paths
|
|
149
149
|
replyTo?: string; // reply to which message ID
|
|
150
|
+
ref?: string; // path to request/response document (reference-based notification)
|
|
150
151
|
};
|
|
151
152
|
timestamp: number;
|
|
152
153
|
// Delivery tracking
|
|
@@ -212,7 +212,7 @@ const lastSessionFile = new Map<string, string>();
|
|
|
212
212
|
|
|
213
213
|
function detectSessionChanges(projectPath: string, pattern: string | undefined, prevLineCount: number, contextChars = 500, sessionId?: string): { changes: WatchChange | null; lineCount: number } {
|
|
214
214
|
const claudeHome = join(homedir(), '.claude', 'projects');
|
|
215
|
-
const encoded = projectPath.replace(
|
|
215
|
+
const encoded = projectPath.replace(/[^a-zA-Z0-9]/g, '-');
|
|
216
216
|
const sessionDir = join(claudeHome, encoded);
|
|
217
217
|
if (!existsSync(sessionDir)) return { changes: null, lineCount: prevLineCount };
|
|
218
218
|
|
|
@@ -688,7 +688,7 @@ async function handleSmith(id: string, body: any, res: ServerResponse): Promise<
|
|
|
688
688
|
const agentConfig = agentId ? orch.getSnapshot().agents.find(a => a.id === agentId) : null;
|
|
689
689
|
const agentWorkDir = agentConfig?.workDir && agentConfig.workDir !== './' && agentConfig.workDir !== '.'
|
|
690
690
|
? join(orch.projectPath, agentConfig.workDir) : orch.projectPath;
|
|
691
|
-
const encoded = resolve(agentWorkDir).replace(
|
|
691
|
+
const encoded = resolve(agentWorkDir).replace(/[^a-zA-Z0-9]/g, '-');
|
|
692
692
|
const sessDir = join(homedir(), '.claude', 'projects', encoded);
|
|
693
693
|
const entries = readdirSync(sessDir);
|
|
694
694
|
const files = entries
|
package/next-env.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/// <reference types="next" />
|
|
2
2
|
/// <reference types="next/image-types/global" />
|
|
3
|
-
import "./.next/
|
|
3
|
+
import "./.next/types/routes.d.ts";
|
|
4
4
|
|
|
5
5
|
// NOTE: This file should not be edited
|
|
6
6
|
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
|
package/package.json
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
approveBuilds: true
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Benchmark: Claude Code vs Forge Workspace
|
|
2
|
+
|
|
3
|
+
Compares a single Claude Code run against a Forge multi-smith workspace on the same task.
|
|
4
|
+
|
|
5
|
+
## Files
|
|
6
|
+
|
|
7
|
+
- `task.md` — task description (given to both harnesses verbatim)
|
|
8
|
+
- `validator.sh` — validates the output (exit 0 = pass)
|
|
9
|
+
- `run.ts` — main runner
|
|
10
|
+
- `results/` — markdown reports (gitignored)
|
|
11
|
+
|
|
12
|
+
## Prerequisites
|
|
13
|
+
|
|
14
|
+
1. **Forge running**: `forge server start` (listening on port 8403)
|
|
15
|
+
2. **Claude Code installed** and authenticated (`claude --version` works)
|
|
16
|
+
3. **harness_test project** exists at `/Users/zliu/IdeaProjects/harness_test`
|
|
17
|
+
|
|
18
|
+
## Run
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pnpm tsx scripts/bench/run.ts
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## What it does
|
|
25
|
+
|
|
26
|
+
1. Prepares `bench/start` branch in harness_test (fresh from main)
|
|
27
|
+
2. **Claude Code run**:
|
|
28
|
+
- Creates `bench/claude-<ts>` branch
|
|
29
|
+
- Runs `claude -p --dangerously-skip-permissions "<task>"` in harness_test
|
|
30
|
+
- Commits output, runs validator
|
|
31
|
+
3. **Forge workspace run**:
|
|
32
|
+
- Creates `bench/forge-<ts>` branch
|
|
33
|
+
- Finds/creates workspace pointing at harness_test
|
|
34
|
+
- Removes existing agents, adds fresh Input → Lead → Engineer → QA
|
|
35
|
+
- Starts daemon, submits task to Input, triggers Lead
|
|
36
|
+
- Polls every 10s until all smiths finish (or 20min timeout)
|
|
37
|
+
- Stops daemon, commits output, runs validator
|
|
38
|
+
4. Writes comparison report to `results/report-<ts>.md`
|
|
39
|
+
|
|
40
|
+
## Validation
|
|
41
|
+
|
|
42
|
+
The validator checks:
|
|
43
|
+
1. `src/utils/text.js` exists with `capitalize` and `reverseWords` exports
|
|
44
|
+
2. `src/utils/text.test.js` exists
|
|
45
|
+
3. `node --test utils/text.test.js` passes (agent's own tests)
|
|
46
|
+
4. External smoke test: independent check that both functions behave correctly (including error cases)
|
|
47
|
+
|
|
48
|
+
## Inspecting Results
|
|
49
|
+
|
|
50
|
+
- Git branches: `git branch | grep bench/` in harness_test
|
|
51
|
+
- Diff: `git diff bench/start...bench/claude-<ts>` (or `forge-<ts>`)
|
|
52
|
+
- Markdown report: `scripts/bench/results/report-<ts>.md`
|
|
53
|
+
|
|
54
|
+
## Tuning
|
|
55
|
+
|
|
56
|
+
Edit `run.ts` constants:
|
|
57
|
+
- `TASK_TIMEOUT_MS` — per-run timeout (default 20 min)
|
|
58
|
+
- `POLL_INTERVAL_MS` — Forge polling frequency (default 10s)
|
|
59
|
+
- `PROJECT` — target project path
|
|
60
|
+
- `FORGE_URL` — Forge API base URL
|
|
61
|
+
|
|
62
|
+
## Notes
|
|
63
|
+
|
|
64
|
+
- The script leaves branches around for inspection — you can diff them manually after
|
|
65
|
+
- Forge workspace agents are configured minimally (role + 2-3 steps each) to keep comparison fair
|
|
66
|
+
- If Forge has auth enabled, the script may need a token — extend `api()` helper to send `X-Forge-Token`
|