@hamp10/agentforge 0.2.21 → 0.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agentforge.js +909 -115
- package/package.json +2 -1
- package/scripts/check-task-semantics.js +911 -0
- package/scripts/postinstall.js +20 -5
- package/src/OllamaAgent.js +1178 -246
- package/src/OpenClawCLI.js +5897 -748
- package/src/browser.js +392 -0
- package/src/default-task-guides.js +95 -0
- package/src/resolveOpenclaw.js +38 -7
- package/src/selfUpdate.js +31 -3
- package/src/supervisor.js +88 -20
- package/src/taskSemantics.js +141 -0
- package/src/worker.js +4257 -230
- package/templates/agent/AGENTFORGE.md +151 -53
- package/templates/hooks/agentforge-platform/handler.js +322 -0
- package/src/HampAgentCLI.js +0 -125
- package/src/hampagent/browser.js +0 -321
- package/src/hampagent/runner.js +0 -277
- package/src/hampagent/sessions.js +0 -62
- package/src/hampagent/tools.js +0 -298
|
@@ -0,0 +1,911 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import assert from 'node:assert/strict';
|
|
4
|
+
import { execFileSync } from 'node:child_process';
|
|
5
|
+
import { existsSync, lstatSync, mkdtempSync, mkdirSync, readFileSync, readlinkSync, rmSync, symlinkSync, writeFileSync } from 'node:fs';
|
|
6
|
+
import { tmpdir } from 'node:os';
|
|
7
|
+
import path from 'node:path';
|
|
8
|
+
import { deflateSync } from 'node:zlib';
|
|
9
|
+
import { extractExplicitScope } from '../src/taskSemantics.js';
|
|
10
|
+
import { AgentForgeWorker } from '../src/worker.js';
|
|
11
|
+
import { OpenClawCLI } from '../src/OpenClawCLI.js';
|
|
12
|
+
|
|
13
|
+
const cases = [
|
|
14
|
+
{
|
|
15
|
+
text: 'On the example.com listing pages for Alpha and Beta the layout is off',
|
|
16
|
+
slugs: ['alpha', 'beta'],
|
|
17
|
+
absent: ['example-com', 'layout'],
|
|
18
|
+
pageOnly: true,
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
text: 'Work on the example.com listing pages for Alpha.ai and Beta.ai. Make only those two listing pages meet the visual and content quality of the rest of the site.',
|
|
22
|
+
slugs: ['alpha-ai', 'beta-ai'],
|
|
23
|
+
absent: ['example-com', 'visual', 'quality'],
|
|
24
|
+
pageOnly: true,
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
text: 'Work on the Example.com listing pages for AlphaBoard and BetaMatch. Delete and rebuild those two listing page implementations from a clean start, preserving the same URLs and site conventions. Fix the readability and design issues. Only change those two listing pages.',
|
|
28
|
+
slugs: ['alphaboard', 'betamatch'],
|
|
29
|
+
absent: ['example-com', 'delete', 'rebuild', 'readability', 'design', 'urls', 'site'],
|
|
30
|
+
pageOnly: true,
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
text: 'Work on Alpha pages for readability',
|
|
34
|
+
slugs: ['alpha'],
|
|
35
|
+
absent: ['readability'],
|
|
36
|
+
pageOnly: true,
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
text: 'Improve pages for Foo and The Bar without touching shared files',
|
|
40
|
+
slugs: ['foo', 'bar'],
|
|
41
|
+
absent: ['shared'],
|
|
42
|
+
pageOnly: true,
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
text: 'Improve Alpha.ai pages for readability',
|
|
46
|
+
slugs: ['alpha-ai'],
|
|
47
|
+
absent: ['readability'],
|
|
48
|
+
pageOnly: true,
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
text: 'The pages are broken',
|
|
52
|
+
slugs: [],
|
|
53
|
+
pageOnly: false,
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
text: 'Compare the listing pages and report issues',
|
|
57
|
+
slugs: [],
|
|
58
|
+
pageOnly: false,
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
text: `[Platform task guide quality gates — these are mandatory for this task type]
|
|
62
|
+
UI work should not be hero-only or color-only.
|
|
63
|
+
|
|
64
|
+
[System context:
|
|
65
|
+
- Platform: AgentForge.ai. Dashboard: https://agentforgeai-production.up.railway.app/dashboard.
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
The task is: "There are readability and design issues on the Example.com listing pages for AlphaDeck and BetaGrid. Fix them."`,
|
|
69
|
+
slugs: ['alphadeck', 'betagrid'],
|
|
70
|
+
absent: ['agentforge-ai', 'agentforgeai-production-up-railway', 'hero', 'color', 'readability', 'example-com'],
|
|
71
|
+
pageOnly: true,
|
|
72
|
+
},
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
for (const item of cases) {
|
|
76
|
+
const scope = extractExplicitScope(item.text);
|
|
77
|
+
assert.equal(scope.pageOnly, item.pageOnly, `${item.text}: pageOnly`);
|
|
78
|
+
assert.deepEqual(
|
|
79
|
+
[...new Set(scope.slugs)].sort(),
|
|
80
|
+
[...new Set(item.slugs)].sort(),
|
|
81
|
+
`${item.text}: slugs`
|
|
82
|
+
);
|
|
83
|
+
for (const absent of item.absent || []) {
|
|
84
|
+
assert.equal(scope.slugs.includes(absent), false, `${item.text}: unexpected ${absent}`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const git = (cwd, args) => execFileSync('git', args, { cwd, encoding: 'utf-8', stdio: 'pipe' }).trim();
|
|
89
|
+
|
|
90
|
+
const writeRgbPng = (filePath, width, height, sample) => {
|
|
91
|
+
const rows = [];
|
|
92
|
+
for (let y = 0; y < height; y += 1) {
|
|
93
|
+
const row = Buffer.alloc(1 + (width * 3));
|
|
94
|
+
row[0] = 0;
|
|
95
|
+
for (let x = 0; x < width; x += 1) {
|
|
96
|
+
const rgb = sample(x, y);
|
|
97
|
+
const idx = 1 + (x * 3);
|
|
98
|
+
row[idx] = rgb[0];
|
|
99
|
+
row[idx + 1] = rgb[1];
|
|
100
|
+
row[idx + 2] = rgb[2];
|
|
101
|
+
}
|
|
102
|
+
rows.push(row);
|
|
103
|
+
}
|
|
104
|
+
const chunk = (type, data = Buffer.alloc(0)) => {
|
|
105
|
+
const out = Buffer.alloc(12 + data.length);
|
|
106
|
+
out.writeUInt32BE(data.length, 0);
|
|
107
|
+
out.write(type, 4, 4, 'ascii');
|
|
108
|
+
data.copy(out, 8);
|
|
109
|
+
out.writeUInt32BE(0, 8 + data.length);
|
|
110
|
+
return out;
|
|
111
|
+
};
|
|
112
|
+
const ihdr = Buffer.alloc(13);
|
|
113
|
+
ihdr.writeUInt32BE(width, 0);
|
|
114
|
+
ihdr.writeUInt32BE(height, 4);
|
|
115
|
+
ihdr[8] = 8;
|
|
116
|
+
ihdr[9] = 2;
|
|
117
|
+
ihdr[10] = 0;
|
|
118
|
+
ihdr[11] = 0;
|
|
119
|
+
ihdr[12] = 0;
|
|
120
|
+
writeFileSync(filePath, Buffer.concat([
|
|
121
|
+
Buffer.from('89504e470d0a1a0a', 'hex'),
|
|
122
|
+
chunk('IHDR', ihdr),
|
|
123
|
+
chunk('IDAT', deflateSync(Buffer.concat(rows))),
|
|
124
|
+
chunk('IEND'),
|
|
125
|
+
]));
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
const makeShallowUiFixture = () => {
|
|
129
|
+
const repo = mkdtempSync(path.join(tmpdir(), 'agentforge-ui-quality-'));
|
|
130
|
+
const domainsDir = path.join(repo, 'public_html', 'domains');
|
|
131
|
+
const targetCssDir = path.join(domainsDir, 'css');
|
|
132
|
+
const nestedDomainsDir = path.join(repo, path.basename(repo), 'public_html', 'domains');
|
|
133
|
+
mkdirSync(domainsDir, { recursive: true });
|
|
134
|
+
mkdirSync(targetCssDir, { recursive: true });
|
|
135
|
+
mkdirSync(nestedDomainsDir, { recursive: true });
|
|
136
|
+
const legacyExternalCss = Array.from({ length: 30 }, (_, i) =>
|
|
137
|
+
`.legacy-alpha-shell-${i} { background: #${String(220000 + i).slice(0, 6)}; border-radius: ${i + 2}px; padding: ${i + 12}px; color: #102030; }`
|
|
138
|
+
).join('\n');
|
|
139
|
+
writeFileSync(path.join(targetCssDir, 'alpha.css'), legacyExternalCss);
|
|
140
|
+
for (const name of ['alpha', 'beta']) {
|
|
141
|
+
const baselineHtml = [
|
|
142
|
+
'<!doctype html>',
|
|
143
|
+
'<html>',
|
|
144
|
+
'<head>',
|
|
145
|
+
'<!-- Google tag (gtag.js) -->',
|
|
146
|
+
'<style>',
|
|
147
|
+
'/* Page-specific styles */',
|
|
148
|
+
'.page { color: #111; }',
|
|
149
|
+
'</style>',
|
|
150
|
+
'</head>',
|
|
151
|
+
'<body>',
|
|
152
|
+
`<main><section><h1>${name}</h1><p>Existing page content with enough structure to be a baseline.</p></section></main>`,
|
|
153
|
+
'</body>',
|
|
154
|
+
'</html>',
|
|
155
|
+
].join('\n');
|
|
156
|
+
writeFileSync(path.join(domainsDir, `${name}.html`), baselineHtml);
|
|
157
|
+
writeFileSync(path.join(nestedDomainsDir, `${name}.html`), baselineHtml);
|
|
158
|
+
}
|
|
159
|
+
git(repo, ['init']);
|
|
160
|
+
git(repo, ['config', 'user.email', 'test@example.com']);
|
|
161
|
+
git(repo, ['config', 'user.name', 'AgentForge Test']);
|
|
162
|
+
git(repo, ['add', '.']);
|
|
163
|
+
git(repo, ['commit', '-m', 'baseline']);
|
|
164
|
+
const head = git(repo, ['rev-parse', 'HEAD']);
|
|
165
|
+
|
|
166
|
+
const shallowCss = Array.from({ length: 28 }, (_, i) =>
|
|
167
|
+
`.hero-polish-${i} { color: #${String(100000 + i).slice(0, 6)}; padding: ${i + 1}px; margin: ${i}px; box-shadow: 0 0 ${i + 2}px rgba(0,0,0,.1); }`
|
|
168
|
+
).join('\n') + '\n.list-heading { font-size: 1.5rem; /* Standardizing h3 size */ }';
|
|
169
|
+
for (const name of ['alpha', 'beta']) {
|
|
170
|
+
writeFileSync(path.join(domainsDir, `${name}.html`), [
|
|
171
|
+
'<!doctype html>',
|
|
172
|
+
'<html>',
|
|
173
|
+
`<head><style>${shallowCss}</style></head>`,
|
|
174
|
+
'<body>',
|
|
175
|
+
'<main>',
|
|
176
|
+
`<section class="property-hero hero-polish" style="padding: 6rem; color: white;">`,
|
|
177
|
+
`<div class="hero-badge">${name} category</div>`,
|
|
178
|
+
`<h1 class="property-title">${name}</h1>`,
|
|
179
|
+
'<div class="hero-stats">',
|
|
180
|
+
'<span class="hero-stat">AI</span>',
|
|
181
|
+
'<span class="hero-stat">Premium</span>',
|
|
182
|
+
'<span class="hero-stat">Ready</span>',
|
|
183
|
+
'</div>',
|
|
184
|
+
'</section>',
|
|
185
|
+
'</main>',
|
|
186
|
+
'</body>',
|
|
187
|
+
'</html>',
|
|
188
|
+
].join('\n'));
|
|
189
|
+
}
|
|
190
|
+
git(repo, ['add', '.']);
|
|
191
|
+
git(repo, ['commit', '-m', 'shallow styled pages']);
|
|
192
|
+
return { repo, head, legacyExternalCss };
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
const fixture = makeShallowUiFixture();
|
|
196
|
+
try {
|
|
197
|
+
const worker = Object.create(AgentForgeWorker.prototype);
|
|
198
|
+
worker.cli = { isAnthropicApiKey: (key) => /^sk-ant-/i.test(String(key || '')) };
|
|
199
|
+
const cli = Object.create(OpenClawCLI.prototype);
|
|
200
|
+
const projectsRoot = mkdtempSync(path.join(tmpdir(), 'agentforge-project-list-'));
|
|
201
|
+
let agentWorkspace = null;
|
|
202
|
+
try {
|
|
203
|
+
const targetProject = path.join(projectsRoot, 'Hamp.com');
|
|
204
|
+
const appProject = path.join(projectsRoot, 'Tiny App');
|
|
205
|
+
const staleStaticRoot = path.join(projectsRoot, 'public_html');
|
|
206
|
+
const logsDir = path.join(projectsRoot, 'logs');
|
|
207
|
+
agentWorkspace = mkdtempSync(path.join(tmpdir(), 'agentforge-agent-workspace-'));
|
|
208
|
+
mkdirSync(path.join(targetProject, '.git'), { recursive: true });
|
|
209
|
+
mkdirSync(appProject, { recursive: true });
|
|
210
|
+
mkdirSync(staleStaticRoot, { recursive: true });
|
|
211
|
+
mkdirSync(logsDir, { recursive: true });
|
|
212
|
+
writeFileSync(path.join(appProject, 'package.json'), '{"scripts":{"start":"vite"}}');
|
|
213
|
+
writeFileSync(path.join(staleStaticRoot, 'index.html'), '<main>stale deploy copy</main>');
|
|
214
|
+
symlinkSync(staleStaticRoot, path.join(agentWorkspace, 'public_html'), 'dir');
|
|
215
|
+
|
|
216
|
+
assert.deepEqual(
|
|
217
|
+
worker._listProjectDirectoryEntries(projectsRoot),
|
|
218
|
+
['Hamp.com', 'Tiny App'],
|
|
219
|
+
'project discovery should exclude generic source/deploy folders unless they are standalone projects'
|
|
220
|
+
);
|
|
221
|
+
assert.deepEqual(
|
|
222
|
+
worker._findMentionedProjects(projectsRoot, 'Work on the Hamp.com listing pages for Alpha and Beta'),
|
|
223
|
+
['Hamp.com'],
|
|
224
|
+
'task project matching should ignore stale sibling public_html folders'
|
|
225
|
+
);
|
|
226
|
+
worker._syncProjectWorkspaceLinks(agentWorkspace, projectsRoot, {
|
|
227
|
+
resolvedTaskProject: { name: 'Hamp.com', path: targetProject },
|
|
228
|
+
});
|
|
229
|
+
assert.equal(lstatSync(path.join(agentWorkspace, 'Hamp.com')).isSymbolicLink(), true);
|
|
230
|
+
assert.equal(readlinkSync(path.join(agentWorkspace, 'Hamp.com')), targetProject);
|
|
231
|
+
assert.equal(
|
|
232
|
+
existsSync(path.join(agentWorkspace, 'public_html')),
|
|
233
|
+
false,
|
|
234
|
+
'scoped project workspaces should remove stale generic sibling symlinks'
|
|
235
|
+
);
|
|
236
|
+
assert.doesNotMatch(
|
|
237
|
+
readFileSync(path.join(agentWorkspace, 'PROJECTS.md'), 'utf-8'),
|
|
238
|
+
/public_html/i,
|
|
239
|
+
'scoped project manifest should not advertise sibling deploy/source roots'
|
|
240
|
+
);
|
|
241
|
+
} finally {
|
|
242
|
+
rmSync(projectsRoot, { recursive: true, force: true });
|
|
243
|
+
if (agentWorkspace) rmSync(agentWorkspace, { recursive: true, force: true });
|
|
244
|
+
}
|
|
245
|
+
const visionPlan = worker._resolveVisualAnalysisModels({
|
|
246
|
+
taskModel: 'google/gemini-2.5-flash',
|
|
247
|
+
imageModel: 'openai/gpt-4.1',
|
|
248
|
+
fallbackVisionModels: ['google/gemini-2.5-pro'],
|
|
249
|
+
flowConfig: {},
|
|
250
|
+
providerKeys: { google: 'google-key', openai: 'openai-key' },
|
|
251
|
+
});
|
|
252
|
+
assert.equal(
|
|
253
|
+
visionPlan.primary,
|
|
254
|
+
'google/gemini-2.5-flash',
|
|
255
|
+
'visual analysis should prefer the routed task model over a generic configured vision model when both can see images'
|
|
256
|
+
);
|
|
257
|
+
assert.deepEqual(
|
|
258
|
+
visionPlan.fallbacks,
|
|
259
|
+
['openai/gpt-4.1', 'google/gemini-2.5-pro'],
|
|
260
|
+
'visual analysis should keep configured vision models as fallback candidates'
|
|
261
|
+
);
|
|
262
|
+
assert.match(
|
|
263
|
+
worker._resolveRunnableConfiguredModel('openai/gpt-4.1', { openai: 'openai-key' }, new Set(), { guardrails: { allowed_providers: ['google'] } }).error,
|
|
264
|
+
/not allowed by this flow/i,
|
|
265
|
+
'flow allowed-provider guardrails should block disallowed configured models'
|
|
266
|
+
);
|
|
267
|
+
assert.equal(
|
|
268
|
+
worker._modelProviderAvailable('anthropic/claude-sonnet-4-6', { anthropic: 'claude-oauth-token' }),
|
|
269
|
+
false,
|
|
270
|
+
'anthropic provider availability must require a Console API key, not a claude.ai session token'
|
|
271
|
+
);
|
|
272
|
+
assert.equal(
|
|
273
|
+
worker._modelProviderAvailable('anthropic/claude-sonnet-4-6', { anthropic: 'sk-ant-test' }),
|
|
274
|
+
true,
|
|
275
|
+
'anthropic provider availability should accept real Console API keys'
|
|
276
|
+
);
|
|
277
|
+
assert.deepEqual(
|
|
278
|
+
worker._unavailableConfiguredVisionAnalysisModels({
|
|
279
|
+
imageModel: 'openai/gpt-4.1',
|
|
280
|
+
fallbackVisionModels: ['google/gemini-2.5-pro'],
|
|
281
|
+
flowConfig: {},
|
|
282
|
+
providerKeys: { google: 'google-key' },
|
|
283
|
+
}),
|
|
284
|
+
['openai/gpt-4.1'],
|
|
285
|
+
'configured vision-analysis models without runnable provider credentials should be reported instead of silently dropped'
|
|
286
|
+
);
|
|
287
|
+
await assert.rejects(
|
|
288
|
+
() => worker._resolveFlowRoute('Route this implementation task.', [], {
|
|
289
|
+
router_model: 'openai/gpt-4.1',
|
|
290
|
+
routes: [{ condition: 'intent_code', model: 'google/gemini-2.5-flash' }],
|
|
291
|
+
guardrails: { allowed_providers: ['openai', 'google'] },
|
|
292
|
+
}, { google: 'google-key' }),
|
|
293
|
+
/Configured flow router model cannot run/i,
|
|
294
|
+
'configured router/classifier models should fail clearly when their provider is unavailable'
|
|
295
|
+
);
|
|
296
|
+
const message = 'Improve the listing pages for Alpha and Beta so they feel polished.';
|
|
297
|
+
const baseline = [{ root: fixture.repo, head: fixture.head }];
|
|
298
|
+
assert.match(
|
|
299
|
+
worker._formatScopedUiTargetSetReminder(message),
|
|
300
|
+
/Requested scoped UI targets: alpha, beta/i,
|
|
301
|
+
'multi-target UI quality retries should preserve the full scoped target set'
|
|
302
|
+
);
|
|
303
|
+
assert.match(
|
|
304
|
+
worker._buildScopedUiTargetSetNudge(baseline, message),
|
|
305
|
+
/one delivery set/i,
|
|
306
|
+
'scoped UI target nudge should tell retries to handle all named targets together'
|
|
307
|
+
);
|
|
308
|
+
assert.match(
|
|
309
|
+
worker._formatUiImplementationArtifactNudge([
|
|
310
|
+
{ repo: fixture.repo, path: 'public_html/domains/alpha.html', issue: 'inline source comment', source: 'working tree', line: '/* improved contrast */' },
|
|
311
|
+
]),
|
|
312
|
+
/not sufficient/i,
|
|
313
|
+
'patch-artifact feedback should not imply source cleanup alone completes UI quality work'
|
|
314
|
+
);
|
|
315
|
+
assert.match(
|
|
316
|
+
worker._buildShallowUiSurfaceNudge(baseline, message),
|
|
317
|
+
/dominated by hero, CTA, typography, color, or spacing tweaks/i,
|
|
318
|
+
'shallow hero/inline-style UI diff should be rejected'
|
|
319
|
+
);
|
|
320
|
+
const artifactNudge = worker._buildUiImplementationArtifactNudge(baseline, message);
|
|
321
|
+
assert.match(
|
|
322
|
+
artifactNudge,
|
|
323
|
+
/inline style attribute/i,
|
|
324
|
+
'new inline style attributes should be treated as UI patch artifacts'
|
|
325
|
+
);
|
|
326
|
+
assert.match(
|
|
327
|
+
artifactNudge,
|
|
328
|
+
/Page-local <style> blocks or page-owned scoped stylesheets are acceptable/i,
|
|
329
|
+
'UI artifact nudge should not make agents infer that scoped page-local CSS is forbidden'
|
|
330
|
+
);
|
|
331
|
+
assert.doesNotMatch(
|
|
332
|
+
artifactNudge,
|
|
333
|
+
/Standardizing h3 size/i,
|
|
334
|
+
'neutral structural UI comments should not distract repair loops as patch artifacts'
|
|
335
|
+
);
|
|
336
|
+
const alphaFixturePath = path.join(fixture.repo, 'public_html', 'domains', 'alpha.html');
|
|
337
|
+
const alphaFixtureCurrent = readFileSync(alphaFixturePath, 'utf-8');
|
|
338
|
+
writeFileSync(
|
|
339
|
+
alphaFixturePath,
|
|
340
|
+
alphaFixtureCurrent.replace('<head><style>', '<head>\n<!-- Google tag (gtag.js) -->\n<style>')
|
|
341
|
+
);
|
|
342
|
+
assert.equal(
|
|
343
|
+
worker._findUiImplementationArtifactChanges(baseline, message).some(w => /Google tag/i.test(w.line)),
|
|
344
|
+
false,
|
|
345
|
+
'full-run patch artifact validation should not flag baseline comments that survive or are reintroduced by a rewrite'
|
|
346
|
+
);
|
|
347
|
+
writeFileSync(alphaFixturePath, alphaFixtureCurrent);
|
|
348
|
+
const metaUrlHtml = alphaFixtureCurrent.replace(
|
|
349
|
+
'</head>',
|
|
350
|
+
'<meta property="og:url" content="https://example.com/domains/alpha">\n</head>'
|
|
351
|
+
);
|
|
352
|
+
writeFileSync(alphaFixturePath, metaUrlHtml);
|
|
353
|
+
assert.equal(
|
|
354
|
+
worker._findUiImplementationArtifactChanges(baseline, message).some(w => /example\.com\/domains\/alpha/i.test(w.line)),
|
|
355
|
+
false,
|
|
356
|
+
'patch artifact validation should not treat https:// URLs in added markup as source comments'
|
|
357
|
+
);
|
|
358
|
+
writeFileSync(alphaFixturePath, alphaFixtureCurrent);
|
|
359
|
+
const patchNarrationCommentHtml = alphaFixtureCurrent.replace(
|
|
360
|
+
'</style>',
|
|
361
|
+
'/* Fix clipped heading issue from visual warning */\n</style>'
|
|
362
|
+
);
|
|
363
|
+
writeFileSync(alphaFixturePath, patchNarrationCommentHtml);
|
|
364
|
+
assert.equal(
|
|
365
|
+
worker._findUiImplementationArtifactChanges(baseline, message).some(w => /Fix clipped heading issue/i.test(w.line)),
|
|
366
|
+
true,
|
|
367
|
+
'patch artifact validation should still reject comments that narrate the repair rationale'
|
|
368
|
+
);
|
|
369
|
+
writeFileSync(alphaFixturePath, alphaFixtureCurrent);
|
|
370
|
+
const cssDir = path.join(fixture.repo, 'public_html', 'css');
|
|
371
|
+
mkdirSync(cssDir, { recursive: true });
|
|
372
|
+
const scopedCssFile = path.join(cssDir, 'alpha.css');
|
|
373
|
+
writeFileSync(scopedCssFile, '.alpha-page { color: #111; }');
|
|
374
|
+
assert.equal(
|
|
375
|
+
worker._findScopeDriftRepoChanges(baseline, message).length,
|
|
376
|
+
0,
|
|
377
|
+
'page-owned scoped stylesheet files should not be restored as out-of-scope work'
|
|
378
|
+
);
|
|
379
|
+
rmSync(scopedCssFile, { force: true });
|
|
380
|
+
const sharedCssFile = path.join(cssDir, 'domain-detail.css');
|
|
381
|
+
writeFileSync(sharedCssFile, '.shared-detail { color: #111; }');
|
|
382
|
+
assert.match(
|
|
383
|
+
worker._formatScopeDriftNudge(worker._findScopeDriftRepoChanges(baseline, message)),
|
|
384
|
+
/domain-detail\.css/i,
|
|
385
|
+
'shared stylesheet files should still be reported as out-of-scope for scoped page work'
|
|
386
|
+
);
|
|
387
|
+
rmSync(sharedCssFile, { force: true });
|
|
388
|
+
const directOldHtml = '<!doctype html><html><body><main><section class="hero"><h1>Alpha</h1><p>Existing body copy.</p></section></main></body></html>';
|
|
389
|
+
const directInlineStyleHtml = '<!doctype html><html><body><main><section class="hero" style="padding: 4rem; color: white;"><h1>Alpha</h1><p>Existing body copy.</p></section></main></body></html>';
|
|
390
|
+
assert.throws(
|
|
391
|
+
() => cli._validateDirectUiFileContent(
|
|
392
|
+
path.join(fixture.repo, 'public_html', 'domains', 'alpha.html'),
|
|
393
|
+
directInlineStyleHtml,
|
|
394
|
+
{ task: message, oldContent: directOldHtml }
|
|
395
|
+
),
|
|
396
|
+
/new inline style attribute/i,
|
|
397
|
+
'direct UI edits should reject newly introduced inline style attributes before full-run verification'
|
|
398
|
+
);
|
|
399
|
+
assert.throws(
|
|
400
|
+
() => cli._validateDirectUiFileContent(
|
|
401
|
+
path.join(fixture.repo, 'public_html', 'domains', 'alpha.html'),
|
|
402
|
+
directInlineStyleHtml,
|
|
403
|
+
{ task: message, oldContent: directOldHtml }
|
|
404
|
+
),
|
|
405
|
+
/page-local <style> blocks or page-owned scoped stylesheets/i,
|
|
406
|
+
'direct UI validation should steer agents toward scoped CSS instead of no styling'
|
|
407
|
+
);
|
|
408
|
+
assert.doesNotThrow(
|
|
409
|
+
() => cli._validateDirectUiFileContent(
|
|
410
|
+
path.join(fixture.repo, 'public_html', 'domains', 'alpha.html'),
|
|
411
|
+
directInlineStyleHtml,
|
|
412
|
+
{ task: message, oldContent: directInlineStyleHtml }
|
|
413
|
+
),
|
|
414
|
+
'direct UI validation should not block preexisting unchanged inline style attributes'
|
|
415
|
+
);
|
|
416
|
+
const directBaselineCommentHtml = '<!doctype html><html><head><!-- Google tag (gtag.js) --><style>/* Page-specific styles */ .page { color: #111; }</style></head><body><main><section><h1>Alpha</h1><p>Existing page content with enough structure to be a baseline.</p></section></main></body></html>';
|
|
417
|
+
const directReintroducedBaselineCommentHtml = '<!doctype html><html><head><!-- Google tag (gtag.js) --><style>/* Page-specific styles */ .hero { padding: 4rem; }</style></head><body><main><section class="hero"><h1>Alpha</h1><p>Existing body copy.</p></section></main></body></html>';
|
|
418
|
+
assert.doesNotThrow(
|
|
419
|
+
() => cli._validateDirectUiFileContent(
|
|
420
|
+
path.join(fixture.repo, 'public_html', 'domains', 'alpha.html'),
|
|
421
|
+
directReintroducedBaselineCommentHtml,
|
|
422
|
+
{ task: message, oldContent: directBaselineCommentHtml }
|
|
423
|
+
),
|
|
424
|
+
'direct UI validation should allow tracked baseline comments to be reintroduced after whole-file rewrites'
|
|
425
|
+
);
|
|
426
|
+
const directSoftCommentHtml = '<!doctype html><html><body><style>.hero { border-top: 2px solid rgba(16, 185, 129, 0.5); /* Softened border */ }</style><main><section class="hero"><h1>Alpha</h1><p>Existing body copy.</p></section></main></body></html>';
|
|
427
|
+
assert.doesNotThrow(
|
|
428
|
+
() => cli._validateDirectUiFileContent(
|
|
429
|
+
path.join(fixture.repo, 'public_html', 'domains', 'alpha.html'),
|
|
430
|
+
directSoftCommentHtml,
|
|
431
|
+
{ task: message, oldContent: directOldHtml }
|
|
432
|
+
),
|
|
433
|
+
'direct UI validation should allow neutral structural source comments instead of forcing agents into comment cleanup loops'
|
|
434
|
+
);
|
|
435
|
+
const directPatchNarrationCommentHtml = '<!doctype html><html><body><style>.hero { border-top: 2px solid rgba(16, 185, 129, 0.5); /* Fix clipped hero issue from visual warning */ }</style><main><section class="hero"><h1>Alpha</h1><p>Existing body copy.</p></section></main></body></html>';
|
|
436
|
+
assert.throws(
|
|
437
|
+
() => cli._validateDirectUiFileContent(
|
|
438
|
+
path.join(fixture.repo, 'public_html', 'domains', 'alpha.html'),
|
|
439
|
+
directPatchNarrationCommentHtml,
|
|
440
|
+
{ task: message, oldContent: directOldHtml }
|
|
441
|
+
),
|
|
442
|
+
/new UI source comment|Fix clipped hero issue/i,
|
|
443
|
+
'direct UI edits should still reject source comments that narrate the patch rationale'
|
|
444
|
+
);
|
|
445
|
+
const directLicenseCommentHtml = '<!doctype html><html><body><style>/*! @license MIT */ .hero { padding: 4rem; }</style><main><section class="hero"><h1>Alpha</h1><p>Existing body copy.</p></section></main></body></html>';
|
|
446
|
+
assert.doesNotThrow(
|
|
447
|
+
() => cli._validateDirectUiFileContent(
|
|
448
|
+
path.join(fixture.repo, 'public_html', 'domains', 'alpha.html'),
|
|
449
|
+
directLicenseCommentHtml,
|
|
450
|
+
{ task: message, oldContent: directOldHtml }
|
|
451
|
+
),
|
|
452
|
+
'direct UI validation should allow standard tooling/license source comments'
|
|
453
|
+
);
|
|
454
|
+
const protectedReplacementFile = path.join(fixture.repo, 'public_html', 'domains', 'alpha.html');
|
|
455
|
+
const protectedReplacementOriginal = readFileSync(protectedReplacementFile, 'utf-8');
|
|
456
|
+
await assert.rejects(
|
|
457
|
+
() => cli._runDirectTool(
|
|
458
|
+
'test-agent',
|
|
459
|
+
'write_file',
|
|
460
|
+
{ path: protectedReplacementFile },
|
|
461
|
+
fixture.repo,
|
|
462
|
+
{ task: message }
|
|
463
|
+
),
|
|
464
|
+
/requires a string content payload/i,
|
|
465
|
+
'direct write_file should reject missing content instead of truncating the target'
|
|
466
|
+
);
|
|
467
|
+
assert.equal(
|
|
468
|
+
readFileSync(protectedReplacementFile, 'utf-8'),
|
|
469
|
+
protectedReplacementOriginal,
|
|
470
|
+
'missing-content write_file rejection should leave the existing file unchanged'
|
|
471
|
+
);
|
|
472
|
+
await assert.rejects(
|
|
473
|
+
() => cli._runDirectTool(
|
|
474
|
+
'test-agent',
|
|
475
|
+
'write_file',
|
|
476
|
+
{ path: protectedReplacementFile, content: '' },
|
|
477
|
+
fixture.repo,
|
|
478
|
+
{ task: message }
|
|
479
|
+
),
|
|
480
|
+
/replace existing non-empty file with empty content/i,
|
|
481
|
+
'direct write_file should reject empty replacement content for existing non-empty files'
|
|
482
|
+
);
|
|
483
|
+
assert.equal(
|
|
484
|
+
readFileSync(protectedReplacementFile, 'utf-8'),
|
|
485
|
+
protectedReplacementOriginal,
|
|
486
|
+
'empty replacement rejection should leave the existing file unchanged'
|
|
487
|
+
);
|
|
488
|
+
assert.throws(
|
|
489
|
+
() => cli._guardDirectFileWritePath(
|
|
490
|
+
path.join(fixture.repo, path.basename(fixture.repo), 'public_html', 'domains', 'alpha.html'),
|
|
491
|
+
fixture.repo,
|
|
492
|
+
{ task: message }
|
|
493
|
+
),
|
|
494
|
+
/nested duplicate of the current project/i,
|
|
495
|
+
'tracked nested duplicate page should be blocked when canonical scoped page exists'
|
|
496
|
+
);
|
|
497
|
+
assert.throws(
|
|
498
|
+
() => cli._guardDirectFileWritePath(
|
|
499
|
+
path.join(fixture.repo, 'public_html', 'alpha.html'),
|
|
500
|
+
fixture.repo,
|
|
501
|
+
{ task: 'Work on the listing page for Alpha. Only change that listing page.' }
|
|
502
|
+
),
|
|
503
|
+
/existing scoped page target/i,
|
|
504
|
+
'scoped page work should not create a new same-slug page when an existing scoped target page is present'
|
|
505
|
+
);
|
|
506
|
+
const extractedOldCss = Array.from({ length: 28 }, (_, i) =>
|
|
507
|
+
`.hero-polish-${i} { color: #${String(100000 + i).slice(0, 6)}; padding: ${i + 1}px; margin: ${i}px; box-shadow: 0 0 ${i + 2}px rgba(0,0,0,.1); }`
|
|
508
|
+
).join('\n');
|
|
509
|
+
assert.throws(
|
|
510
|
+
() => cli._validateDirectUiFileContent(
|
|
511
|
+
path.join(fixture.repo, 'public_html', 'domains', 'css', 'alpha.css'),
|
|
512
|
+
extractedOldCss,
|
|
513
|
+
{ task: 'Delete and rebuild the listing pages for Alpha and Beta from a clean start. Only change those listing pages.' }
|
|
514
|
+
),
|
|
515
|
+
/move the old target-page CSS into a new page-owned stylesheet/i,
|
|
516
|
+
'clean-start scoped page work should reject extracting old target-page CSS into a new stylesheet'
|
|
517
|
+
);
|
|
518
|
+
assert.throws(
|
|
519
|
+
() => cli._validateDirectUiFileContent(
|
|
520
|
+
path.join(fixture.repo, 'public_html', 'domains', 'css', 'alpha.css'),
|
|
521
|
+
fixture.legacyExternalCss,
|
|
522
|
+
{ task: 'Delete and rebuild the listing pages for Alpha and Beta from a clean start. Only change those listing pages.' }
|
|
523
|
+
),
|
|
524
|
+
/reuse the old page-owned stylesheet/i,
|
|
525
|
+
'clean-start scoped page work should reject reusing an existing page-owned stylesheet unchanged'
|
|
526
|
+
);
|
|
527
|
+
const freshPageCss = [
|
|
528
|
+
'.alpha-shell { min-height: 100vh; background: #f8f6ef; color: #181818; }',
|
|
529
|
+
'.alpha-wrap { max-width: 1180px; margin: 0 auto; padding: 84px 28px; }',
|
|
530
|
+
'.alpha-kicker { font: 600 13px/1.2 Inter, sans-serif; letter-spacing: .12em; text-transform: uppercase; }',
|
|
531
|
+
'.alpha-title { font: 700 clamp(48px, 8vw, 108px)/.92 Georgia, serif; color: #191714; }',
|
|
532
|
+
'.alpha-summary { max-width: 720px; font-size: 22px; line-height: 1.55; color: #34312c; }',
|
|
533
|
+
'.alpha-panel { border: 1px solid #d8d0bf; background: #fffaf0; padding: 32px; }',
|
|
534
|
+
'.alpha-grid { display: grid; grid-template-columns: 1.2fr .8fr; gap: 32px; }',
|
|
535
|
+
'.alpha-stat { display: flex; justify-content: space-between; border-top: 1px solid #ded6c8; padding: 18px 0; }',
|
|
536
|
+
'.alpha-list { display: grid; gap: 18px; padding: 0; list-style: none; }',
|
|
537
|
+
'.alpha-list li { padding: 20px; background: #ffffff; border: 1px solid #e7dfd0; }',
|
|
538
|
+
'.alpha-cta { display: inline-flex; align-items: center; padding: 16px 22px; background: #191714; color: #fff; }',
|
|
539
|
+
].join('\n');
|
|
540
|
+
assert.doesNotThrow(
|
|
541
|
+
() => cli._validateDirectUiFileContent(
|
|
542
|
+
path.join(fixture.repo, 'public_html', 'domains', 'css', 'alpha.css'),
|
|
543
|
+
freshPageCss,
|
|
544
|
+
{ task: 'Delete and rebuild the listing pages for Alpha and Beta from a clean start. Only change those listing pages.' }
|
|
545
|
+
),
|
|
546
|
+
'clean-start scoped page work should allow materially fresh page-owned CSS'
|
|
547
|
+
);
|
|
548
|
+
assert.throws(
|
|
549
|
+
() => cli._guardDirectBashCommand(
|
|
550
|
+
`rm ${path.basename(fixture.repo)}/public_html/domains/alpha.html`,
|
|
551
|
+
fixture.repo,
|
|
552
|
+
{ task: message }
|
|
553
|
+
),
|
|
554
|
+
/nested duplicate of the current project/i,
|
|
555
|
+
'scoped shell rm should reject nested duplicate project paths'
|
|
556
|
+
);
|
|
557
|
+
assert.throws(
|
|
558
|
+
() => cli._guardDirectBashCommand(
|
|
559
|
+
`cat > ${path.basename(fixture.repo)}/public_html/domains/alpha.html <<'EOF'\n<section>Alpha</section>\nEOF`,
|
|
560
|
+
fixture.repo,
|
|
561
|
+
{ task: message }
|
|
562
|
+
),
|
|
563
|
+
/nested duplicate of the current project/i,
|
|
564
|
+
'scoped shell redirection should reject nested duplicate project paths'
|
|
565
|
+
);
|
|
566
|
+
assert.doesNotThrow(
|
|
567
|
+
() => cli._guardDirectBashCommand(
|
|
568
|
+
`cat > public_html/domains/alpha.html <<'EOF'\n<section>Alpha</section>\nEOF`,
|
|
569
|
+
fixture.repo,
|
|
570
|
+
{ task: message }
|
|
571
|
+
),
|
|
572
|
+
'scoped shell redirection should allow canonical target page writes'
|
|
573
|
+
);
|
|
574
|
+
assert.doesNotThrow(
|
|
575
|
+
() => cli._guardDirectBashCommand(
|
|
576
|
+
'touch public_html/css/alpha.css',
|
|
577
|
+
fixture.repo,
|
|
578
|
+
{ task: message }
|
|
579
|
+
),
|
|
580
|
+
'scoped shell touch should allow page-owned CSS files'
|
|
581
|
+
);
|
|
582
|
+
assert.doesNotThrow(
|
|
583
|
+
() => cli._guardDirectBashCommand(
|
|
584
|
+
"sed -i '' 's/Existing/Updated/g' public_html/domains/alpha.html",
|
|
585
|
+
fixture.repo,
|
|
586
|
+
{ task: message }
|
|
587
|
+
),
|
|
588
|
+
'scoped shell sed should ignore the substitution script and allow canonical target page edits'
|
|
589
|
+
);
|
|
590
|
+
assert.throws(
|
|
591
|
+
() => cli._guardDirectBashCommand(
|
|
592
|
+
'touch public_html/css/domain-detail.css',
|
|
593
|
+
fixture.repo,
|
|
594
|
+
{ task: message }
|
|
595
|
+
),
|
|
596
|
+
/outside the requested task scope/i,
|
|
597
|
+
'scoped shell touch should reject shared CSS files outside the target names'
|
|
598
|
+
);
|
|
599
|
+
const seamPng = path.join(fixture.repo, 'dominant-seam.png');
|
|
600
|
+
writeRgbPng(seamPng, 900, 600, (x, y) => (
|
|
601
|
+
x < 450 && y > 300
|
|
602
|
+
? [74, 92, 120]
|
|
603
|
+
: [26, 36, 50]
|
|
604
|
+
));
|
|
605
|
+
assert.match(
|
|
606
|
+
cli._detectScreenshotVisualDiscontinuities(seamPng).join('\n'),
|
|
607
|
+
/hard-edged/i,
|
|
608
|
+
'screenshot discontinuity scan should catch large hard-edged rectangular seams'
|
|
609
|
+
);
|
|
610
|
+
const subtlePartialSurfacePng = path.join(fixture.repo, 'subtle-partial-surface.png');
|
|
611
|
+
writeRgbPng(subtlePartialSurfacePng, 1000, 700, (x, y) => {
|
|
612
|
+
if (x < 560 && y >= 120 && y < 520) return [34, 43, 59];
|
|
613
|
+
return [27, 36, 52];
|
|
614
|
+
});
|
|
615
|
+
assert.match(
|
|
616
|
+
cli._detectScreenshotVisualDiscontinuities(subtlePartialSurfacePng).join('\n'),
|
|
617
|
+
/partial-surface hard-edged background\/tint block edge/i,
|
|
618
|
+
'screenshot discontinuity scan should catch large subtle partial-surface tint blocks'
|
|
619
|
+
);
|
|
620
|
+
} finally {
|
|
621
|
+
rmSync(fixture.repo, { recursive: true, force: true });
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
const openClawSource = readFileSync(new URL('../src/OpenClawCLI.js', import.meta.url), 'utf-8');
|
|
625
|
+
const workerSource = readFileSync(new URL('../src/worker.js', import.meta.url), 'utf-8');
|
|
626
|
+
const workerBinSource = readFileSync(new URL('../bin/agentforge.js', import.meta.url), 'utf-8');
|
|
627
|
+
const defaultGuidesSource = readFileSync(new URL('../src/default-task-guides.js', import.meta.url), 'utf-8');
|
|
628
|
+
const serverSource = readFileSync(new URL('../../../src/web-server-auth.js', import.meta.url), 'utf-8');
|
|
629
|
+
const dashboardSource = readFileSync(new URL('../../../public/dashboard.js', import.meta.url), 'utf-8');
|
|
630
|
+
assert.match(
|
|
631
|
+
openClawSource,
|
|
632
|
+
/unintended visual artifacts, broken layering, abrupt discontinuities/i,
|
|
633
|
+
'image visual analysis should require broad visual QA reasoning'
|
|
634
|
+
);
|
|
635
|
+
assert.match(
|
|
636
|
+
openClawSource,
|
|
637
|
+
/_detectScreenshotVisualDiscontinuities/i,
|
|
638
|
+
'browser verification should include generic screenshot discontinuity detection'
|
|
639
|
+
);
|
|
640
|
+
assert.match(
|
|
641
|
+
openClawSource,
|
|
642
|
+
/isScopedBrowserTarget && taskNeedsVisualAnalysis && \(!options\?\.afterMutation \|\| isLocalVerificationUrl\)/,
|
|
643
|
+
'scoped target screenshots should be scanned before edits as well as after local edits'
|
|
644
|
+
);
|
|
645
|
+
assert.match(
|
|
646
|
+
openClawSource,
|
|
647
|
+
/large hard-edged rectangular background\/overlay boundary across a dominant UI surface/i,
|
|
648
|
+
'screenshot discontinuity detection should fail large hard-edged visual breaks'
|
|
649
|
+
);
|
|
650
|
+
assert.match(
|
|
651
|
+
openClawSource,
|
|
652
|
+
/partial-surface hard-edged background\/tint block edge/i,
|
|
653
|
+
'screenshot discontinuity detection should fail subtle partial-surface tint blocks'
|
|
654
|
+
);
|
|
655
|
+
assert.match(
|
|
656
|
+
openClawSource,
|
|
657
|
+
/clippedTextBlocks/i,
|
|
658
|
+
'browser verification should include deterministic clipped-text detection'
|
|
659
|
+
);
|
|
660
|
+
assert.match(
|
|
661
|
+
openClawSource,
|
|
662
|
+
/clipped or partially cut-off text\/letterforms/i,
|
|
663
|
+
'image visual analysis should explicitly inspect cut-off text and letterforms'
|
|
664
|
+
);
|
|
665
|
+
assert.match(
|
|
666
|
+
openClawSource,
|
|
667
|
+
/Scrutinize large graphic\/background surfaces/i,
|
|
668
|
+
'image visual analysis should explicitly scrutinize broad rendered surfaces for accidental artifacts'
|
|
669
|
+
);
|
|
670
|
+
assert.match(
|
|
671
|
+
openClawSource,
|
|
672
|
+
/Treat low-contrast text anywhere.*even when the text is large/i,
|
|
673
|
+
'image visual analysis should not waive readability problems because text is large'
|
|
674
|
+
);
|
|
675
|
+
assert.match(
|
|
676
|
+
openClawSource,
|
|
677
|
+
/_markVisionModelTemporarilyUnavailable/i,
|
|
678
|
+
'vision verifier should avoid retrying temporarily unavailable image models on every screenshot'
|
|
679
|
+
);
|
|
680
|
+
assert.match(
|
|
681
|
+
openClawSource,
|
|
682
|
+
/isHttpUiReferenceUrl/i,
|
|
683
|
+
'comparable UI visual context should allow same-site non-target reference pages, not only localhost'
|
|
684
|
+
);
|
|
685
|
+
assert.match(
|
|
686
|
+
openClawSource,
|
|
687
|
+
/scopeViolationThisBatch \|\| comparableContextBlockThisBatch\) break/i,
|
|
688
|
+
'direct tool batches should stop after a scope/comparable-context block instead of spamming blocked writes'
|
|
689
|
+
);
|
|
690
|
+
assert.match(
|
|
691
|
+
openClawSource,
|
|
692
|
+
/imageGenerationModel/i,
|
|
693
|
+
'image generation model must be separate from visual-analysis model routing'
|
|
694
|
+
);
|
|
695
|
+
assert.match(
|
|
696
|
+
openClawSource,
|
|
697
|
+
/generationModelForHook/i,
|
|
698
|
+
'.image-model hook should only receive true image-generation models'
|
|
699
|
+
);
|
|
700
|
+
assert.match(
|
|
701
|
+
workerSource,
|
|
702
|
+
/_resolveVisualAnalysisModels/i,
|
|
703
|
+
'worker should resolve visual-analysis candidates after flow task routing'
|
|
704
|
+
);
|
|
705
|
+
assert.match(
|
|
706
|
+
workerSource,
|
|
707
|
+
/_unavailableConfiguredVisionAnalysisModels/i,
|
|
708
|
+
'worker should fail clearly when configured vision-analysis models are unavailable for visual tasks'
|
|
709
|
+
);
|
|
710
|
+
assert.match(
|
|
711
|
+
workerSource,
|
|
712
|
+
/The task model is first when it can inspect images/i,
|
|
713
|
+
'visual analysis should prefer the routed task model when it supports images'
|
|
714
|
+
);
|
|
715
|
+
assert.match(
|
|
716
|
+
workerSource,
|
|
717
|
+
/Configured flow router model cannot run/i,
|
|
718
|
+
'configured router/classifier models should be treated as required flow wiring'
|
|
719
|
+
);
|
|
720
|
+
assert.match(
|
|
721
|
+
workerSource,
|
|
722
|
+
/provider === 'anthropic'[\s\S]*isAnthropicApiKey/i,
|
|
723
|
+
'anthropic model availability should reject claude.ai OAuth/session tokens'
|
|
724
|
+
);
|
|
725
|
+
assert.match(
|
|
726
|
+
workerSource,
|
|
727
|
+
/_hydrateProviderKeysFromEnv/i,
|
|
728
|
+
'worker should hydrate configured-provider keys from worker env before model routing'
|
|
729
|
+
);
|
|
730
|
+
assert.match(
|
|
731
|
+
workerSource,
|
|
732
|
+
/configuredProvider === 'openrouter'/i,
|
|
733
|
+
'configured router/classifier models should support OpenRouter, not just first-party providers'
|
|
734
|
+
);
|
|
735
|
+
assert.match(
|
|
736
|
+
workerSource,
|
|
737
|
+
/_modelProviderAllowed/i,
|
|
738
|
+
'flow allowed-provider guardrails should be enforced by worker routing'
|
|
739
|
+
);
|
|
740
|
+
assert.doesNotMatch(
|
|
741
|
+
workerSource,
|
|
742
|
+
/nudgeCount\s*<=\s*MAX_NUDGES|nudgeCount\s*>\s*MAX_NUDGES|uiVerificationRetryCount\s*>\s*MAX_NUDGES|uiRepairNudgeCount\s*>\s*UI_REPAIR_NUDGE_LIMIT/i,
|
|
743
|
+
'retry counters should stop before starting another pass at the displayed limit'
|
|
744
|
+
);
|
|
745
|
+
assert.doesNotMatch(
|
|
746
|
+
workerSource,
|
|
747
|
+
/UI_QUALITY_NUDGE_LIMIT\s*=\s*isVisualUiTask\s*\?\s*Math\.min\(MAX_NUDGES,\s*3\)/,
|
|
748
|
+
'visual UI quality repair loops should use the UI repair budget, not the smaller generic nudge limit'
|
|
749
|
+
);
|
|
750
|
+
assert.match(
|
|
751
|
+
workerSource,
|
|
752
|
+
/UI_REPAIR_NUDGE_LIMIT[\s\S]*consumeUiRepairNudge[\s\S]*reached \$\{UI_REPAIR_NUDGE_LIMIT\} total repair retries/i,
|
|
753
|
+
'visual UI retries need a global repair budget across rejection types'
|
|
754
|
+
);
|
|
755
|
+
assert.match(
|
|
756
|
+
workerSource,
|
|
757
|
+
/AGENTFORGE_UI_REPAIR_BUDGET \|\| '12'/,
|
|
758
|
+
'visual UI tasks should get enough default repair budget to keep iterating on design issues'
|
|
759
|
+
);
|
|
760
|
+
assert.match(
|
|
761
|
+
workerSource,
|
|
762
|
+
/uiVerificationRetryCount\s*>=\s*UI_REPAIR_NUDGE_LIMIT[\s\S]*UI task visual verification still reported visible issues — retrying \(\$\{uiVerificationRetryCount\}\/\$\{UI_REPAIR_NUDGE_LIMIT\}/,
|
|
763
|
+
'visual verification failures should use the full UI repair budget, not the generic nudge limit'
|
|
764
|
+
);
|
|
765
|
+
assert.match(
|
|
766
|
+
workerSource,
|
|
767
|
+
/preRunGuide[\s\S]*uiRepairNudgeCount\s*=\s*0[\s\S]*uiVerificationRetryCount\s*=\s*0[\s\S]*postRunGuide[\s\S]*uiRepairNudgeCount\s*=\s*0[\s\S]*uiVerificationRetryCount\s*=\s*0/i,
|
|
768
|
+
'live guide notes should get a fresh UI repair budget instead of inheriting exhausted retries'
|
|
769
|
+
);
|
|
770
|
+
assert.match(
|
|
771
|
+
workerSource,
|
|
772
|
+
/If retry text conflicts with these guide notes, the guide notes win/i,
|
|
773
|
+
'live guide notes should override conflicting retry boilerplate'
|
|
774
|
+
);
|
|
775
|
+
assert.match(
|
|
776
|
+
workerSource,
|
|
777
|
+
/_restoreFailedUiTaskRepoChanges[\s\S]*Restored rejected scoped UI changes after task failure/i,
|
|
778
|
+
'failed scoped UI-quality runs should restore rejected partial work before the next agent attempt'
|
|
779
|
+
);
|
|
780
|
+
assert.match(
|
|
781
|
+
serverSource,
|
|
782
|
+
/function cancelActiveAgentTasks[\s\S]*task_cancel[\s\S]*clearActiveTask/i,
|
|
783
|
+
'destructive agent deletion paths should cancel active worker tasks before removing sessions'
|
|
784
|
+
);
|
|
785
|
+
assert.match(
|
|
786
|
+
serverSource,
|
|
787
|
+
/app\.delete\('\/api\/agents\/delete\/all'[\s\S]*cancelActiveAgentTasks[\s\S]*type: 'agents_pruned'/i,
|
|
788
|
+
'delete-all should cancel active tasks and broadcast removed agents to every connected tab'
|
|
789
|
+
);
|
|
790
|
+
assert.match(
|
|
791
|
+
serverSource,
|
|
792
|
+
/app\.delete\('\/api\/agents\/delete\/cleanup-empty'[\s\S]*activeTasks[\s\S]*type: 'agents_pruned'/i,
|
|
793
|
+
'empty-agent cleanup should preserve live tasks and broadcast removed agents'
|
|
794
|
+
);
|
|
795
|
+
assert.match(
|
|
796
|
+
dashboardSource,
|
|
797
|
+
/confirmDeleteAgent[\s\S]*await fetch[\s\S]*removeAgentFromLocalState/i,
|
|
798
|
+
'single-agent delete should only remove local UI after the server confirms deletion'
|
|
799
|
+
);
|
|
800
|
+
assert.match(
|
|
801
|
+
dashboardSource,
|
|
802
|
+
/function finalizeLiveFeedBeforeGuide[\s\S]*commitChunkBuffer[\s\S]*hideTypingIndicator/i,
|
|
803
|
+
'live Guide messages should close the current visible agent feed before rendering the user bubble'
|
|
804
|
+
);
|
|
805
|
+
assert.match(
|
|
806
|
+
dashboardSource,
|
|
807
|
+
/async function sendGuideMessage[\s\S]*agent\._guideInflight[\s\S]*finalizeLiveFeedBeforeGuide\(agentId\)[\s\S]*addMessage/i,
|
|
808
|
+
'locally sent Guide messages should appear after the latest agent thought, not above the live feed'
|
|
809
|
+
);
|
|
810
|
+
assert.match(
|
|
811
|
+
dashboardSource,
|
|
812
|
+
/if \(isGuide\) finalizeLiveFeedBeforeGuide\(agentId\);[\s\S]*addMessage/i,
|
|
813
|
+
'Guide messages received from another tab should use the same conversation ordering as local Guide messages'
|
|
814
|
+
);
|
|
815
|
+
assert.match(
|
|
816
|
+
openClawSource,
|
|
817
|
+
/provider === 'openai' \|\| provider === 'openrouter'/i,
|
|
818
|
+
'direct model path should understand OpenRouter as OpenAI-compatible'
|
|
819
|
+
);
|
|
820
|
+
assert.match(
|
|
821
|
+
openClawSource,
|
|
822
|
+
/requiresComparableVisualContextForMutation/i,
|
|
823
|
+
'scoped UI work should require visual reference context when local peer pages can be opened'
|
|
824
|
+
);
|
|
825
|
+
assert.match(
|
|
826
|
+
openClawSource,
|
|
827
|
+
/_directUiContextByTask/i,
|
|
828
|
+
'direct UI peer context should persist across native retries for the same task'
|
|
829
|
+
);
|
|
830
|
+
assert.match(
|
|
831
|
+
openClawSource,
|
|
832
|
+
/taskId,\n\s*iteration,/i,
|
|
833
|
+
'direct fast path should receive taskId so UI peer context is task-scoped'
|
|
834
|
+
);
|
|
835
|
+
assert.match(
|
|
836
|
+
openClawSource,
|
|
837
|
+
/persistDirectUiContext\(\);/i,
|
|
838
|
+
'direct UI peer context should be saved when agents inspect source, screenshots, or scoped URLs'
|
|
839
|
+
);
|
|
840
|
+
assert.match(
|
|
841
|
+
openClawSource,
|
|
842
|
+
/const visualWarnings = \[\];[\s\S]*visualWarnings\.push\(`\$\{slug\}: \$\{visualWarning\}`\);[\s\S]*continue;[\s\S]*lastDirectVisualWarning = visualWarnings\.join\('\\n'\);/i,
|
|
843
|
+
'multi-target scoped verification should inspect every missing target before returning visual warnings'
|
|
844
|
+
);
|
|
845
|
+
assert.match(
|
|
846
|
+
openClawSource,
|
|
847
|
+
/open at least one nearby peer page from the same app\/site and inspect it visually/i,
|
|
848
|
+
'comparable UI context guidance should require visual inspection, not source-only matching'
|
|
849
|
+
);
|
|
850
|
+
assert.doesNotMatch(
|
|
851
|
+
openClawSource,
|
|
852
|
+
/taskRequiresVisualVerification\s*&&\s*directIteration\s*<=\s*1\s*&&\s*explicitScopeForTask\.pageOnly/s,
|
|
853
|
+
'comparable UI context gate should apply on retries, not only first iteration'
|
|
854
|
+
);
|
|
855
|
+
assert.match(
|
|
856
|
+
openClawSource,
|
|
857
|
+
/const minRatio = isLargeText \? 3 : 4\.5;/,
|
|
858
|
+
'control/link contrast checks should use text-size-aware WCAG thresholds'
|
|
859
|
+
);
|
|
860
|
+
assert.match(
|
|
861
|
+
defaultGuidesSource,
|
|
862
|
+
/anything that reads as rendering damage rather than deliberate design/i,
|
|
863
|
+
'default UI guide should require broad screenshot artifact critique'
|
|
864
|
+
);
|
|
865
|
+
assert.match(
|
|
866
|
+
defaultGuidesSource,
|
|
867
|
+
/source-artifact feedback appears, clean it up as part of the next pass/i,
|
|
868
|
+
'default UI guide should tell agents not to confuse artifact cleanup with the design fix'
|
|
869
|
+
);
|
|
870
|
+
assert.match(
|
|
871
|
+
workerSource,
|
|
872
|
+
/UI_QUALITY_NUDGE_LIMIT/i,
|
|
873
|
+
'visual UI quality retries should have a bounded loop separate from generic task nudges'
|
|
874
|
+
);
|
|
875
|
+
assert.match(
|
|
876
|
+
workerSource,
|
|
877
|
+
/complete requested page-owned file\(s\)/i,
|
|
878
|
+
'repeated UI quality retries should push complete scoped-file replacement instead of micro-edits'
|
|
879
|
+
);
|
|
880
|
+
assert.match(
|
|
881
|
+
workerSource,
|
|
882
|
+
/Task \$\{taskId\} failed: \$\{errorMessage\}/i,
|
|
883
|
+
'task failures should log the actual error message instead of serializing Error objects as {}'
|
|
884
|
+
);
|
|
885
|
+
assert.match(
|
|
886
|
+
workerSource,
|
|
887
|
+
/Reusing existing healthy OpenClaw Gateway/i,
|
|
888
|
+
'worker startup should reuse an existing healthy gateway instead of interrupting running agents'
|
|
889
|
+
);
|
|
890
|
+
assert.match(
|
|
891
|
+
workerSource,
|
|
892
|
+
/Existing OpenClaw agent process detected; not killing gateway/i,
|
|
893
|
+
'gateway cleanup should not kill the gateway while openclaw agent processes are still running'
|
|
894
|
+
);
|
|
895
|
+
assert.match(
|
|
896
|
+
workerBinSource,
|
|
897
|
+
/serializeLogArg[\s\S]*arg instanceof Error[\s\S]*arg\.stack/i,
|
|
898
|
+
'worker log tee should serialize Error objects with stack/message details'
|
|
899
|
+
);
|
|
900
|
+
assert.match(
|
|
901
|
+
workerBinSource,
|
|
902
|
+
/Worker:\s+.*Running[\s\S]*Connected worker process is running/i,
|
|
903
|
+
'agentforge status should report an already-running worker instead of only telling the user to start it'
|
|
904
|
+
);
|
|
905
|
+
assert.match(
|
|
906
|
+
workerBinSource,
|
|
907
|
+
/SUPERVISOR_PID_FILE[\s\S]*WORKER_PID_FILE[\s\S]*getRuntimeStatus/i,
|
|
908
|
+
'CLI health/status output should inspect the background supervisor and worker PID files'
|
|
909
|
+
);
|
|
910
|
+
|
|
911
|
+
console.log(`task semantics checks passed (${cases.length}); UI quality fixture and visual gate checks passed`);
|