@hamp10/agentforge 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agentforge.js +3 -3
- package/package.json +1 -1
- package/src/OllamaAgent.js +174 -20
- package/src/supervisor.js +7 -0
- package/src/worker.js +2 -2
package/bin/agentforge.js
CHANGED
|
@@ -274,9 +274,9 @@ program
|
|
|
274
274
|
const worker = new AgentForgeWorker(config.token, wsUrl, config);
|
|
275
275
|
|
|
276
276
|
// Graceful shutdown
|
|
277
|
-
process.on('SIGINT',
|
|
278
|
-
process.on('SIGTERM', () => { console.log('\n[SIGTERM received]'); worker.shutdown(); });
|
|
279
|
-
process.on('SIGHUP',
|
|
277
|
+
process.on('SIGINT', () => { console.log('\n[SIGINT received — stopping]'); worker.shutdown(0); }); // Ctrl+C: clean stop
|
|
278
|
+
process.on('SIGTERM', () => { console.log('\n[SIGTERM received — restarting]'); worker.shutdown(1); }); // kill: supervisor restarts
|
|
279
|
+
process.on('SIGHUP', () => { console.log('\n[SIGHUP received — restarting]'); worker.shutdown(1); }); // terminal close: supervisor restarts
|
|
280
280
|
|
|
281
281
|
try {
|
|
282
282
|
await worker.initialize();
|
package/package.json
CHANGED
package/src/OllamaAgent.js
CHANGED
|
@@ -79,6 +79,28 @@ const TOOLS = [
|
|
|
79
79
|
required: ['url']
|
|
80
80
|
}
|
|
81
81
|
}
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
type: 'function',
|
|
85
|
+
function: {
|
|
86
|
+
name: 'take_screenshot',
|
|
87
|
+
description: 'Take a screenshot of the current screen or the agent browser (port 9223). Returns base64 image data you can analyze visually. Use this to check what a webpage looks like, verify a build result, or monitor a running process.',
|
|
88
|
+
parameters: {
|
|
89
|
+
type: 'object',
|
|
90
|
+
properties: {
|
|
91
|
+
target: {
|
|
92
|
+
type: 'string',
|
|
93
|
+
enum: ['screen', 'browser'],
|
|
94
|
+
description: 'screen = full screen capture. browser = screenshot of the agent browser (port 9223).'
|
|
95
|
+
},
|
|
96
|
+
url: {
|
|
97
|
+
type: 'string',
|
|
98
|
+
description: 'Optional: navigate the browser to this URL before taking the screenshot.'
|
|
99
|
+
}
|
|
100
|
+
},
|
|
101
|
+
required: ['target']
|
|
102
|
+
}
|
|
103
|
+
}
|
|
82
104
|
}
|
|
83
105
|
];
|
|
84
106
|
|
|
@@ -146,26 +168,42 @@ export class OllamaAgent extends EventEmitter {
|
|
|
146
168
|
console.log(` Task: ${task}`);
|
|
147
169
|
console.log(` Working dir: ${workDir}`);
|
|
148
170
|
|
|
171
|
+
// Detect model capabilities
|
|
172
|
+
const isQwen3 = this.model.startsWith('qwen3');
|
|
173
|
+
const isVision = /vl|vision|llava|minicpm-v|moondream/i.test(this.model);
|
|
174
|
+
|
|
149
175
|
try {
|
|
150
176
|
// Load conversation history from disk (session persistence)
|
|
151
177
|
const history = this._loadHistory(agentId, workDir, sessionId);
|
|
152
178
|
|
|
179
|
+
const systemPrompt = [
|
|
180
|
+
`You are an AI agent running on AgentForge.ai.`,
|
|
181
|
+
`Your working directory is: ${workDir}`,
|
|
182
|
+
``,
|
|
183
|
+
`CRITICAL RULES — follow these exactly:`,
|
|
184
|
+
`1. Use the provided tools to complete the task. Do NOT write Python code, pseudo-code, or code blocks to simulate tool calls.`,
|
|
185
|
+
`2. To run a command, call the "bash" tool. To read a file, call "read_file". To write, call "write_file". To take a screenshot, call "take_screenshot".`,
|
|
186
|
+
`3. Every action must be a real tool call — not described in text, not shown as code.`,
|
|
187
|
+
`4. When you take a screenshot, you will receive the actual image back and can see it.`,
|
|
188
|
+
`5. When you are done, write a clear summary of what you accomplished.`,
|
|
189
|
+
`6. Do not ask for clarification — make your best judgment and act.`,
|
|
190
|
+
].join('\n');
|
|
191
|
+
|
|
153
192
|
const messages = [
|
|
154
|
-
{
|
|
155
|
-
role: 'system',
|
|
156
|
-
content: [
|
|
157
|
-
`You are an AI agent running on AgentForge.ai.`,
|
|
158
|
-
`Your working directory is: ${workDir}`,
|
|
159
|
-
`Use the available tools to complete the task autonomously.`,
|
|
160
|
-
`When you are done, write a clear summary of what you accomplished.`,
|
|
161
|
-
`Do not ask for clarification — make your best judgment and act.`
|
|
162
|
-
].join('\n')
|
|
163
|
-
},
|
|
193
|
+
{ role: 'system', content: systemPrompt },
|
|
164
194
|
...history,
|
|
165
|
-
{ role: 'user', content: task }
|
|
166
195
|
];
|
|
167
196
|
|
|
197
|
+
// Attach initial image to user message if provided
|
|
198
|
+
const userMessage = { role: 'user', content: task };
|
|
199
|
+
if (image && isVision) {
|
|
200
|
+
const base64 = image.replace(/^data:image\/\w+;base64,/, '');
|
|
201
|
+
userMessage.images = [base64];
|
|
202
|
+
}
|
|
203
|
+
messages.push(userMessage);
|
|
204
|
+
|
|
168
205
|
let finalContent = '';
|
|
206
|
+
let allOutput = ''; // accumulate everything streamed across all turns
|
|
169
207
|
const MAX_TURNS = 25;
|
|
170
208
|
|
|
171
209
|
for (let turn = 0; turn < MAX_TURNS; turn++) {
|
|
@@ -175,18 +213,25 @@ export class OllamaAgent extends EventEmitter {
|
|
|
175
213
|
|
|
176
214
|
let response;
|
|
177
215
|
try {
|
|
178
|
-
|
|
216
|
+
const requestBody = {
|
|
217
|
+
model: this.model,
|
|
218
|
+
messages,
|
|
219
|
+
tools: TOOLS,
|
|
220
|
+
tool_choice: 'auto',
|
|
221
|
+
stream: true,
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
// Disable thinking mode for qwen3 — prevents 3-minute silent think phases
|
|
225
|
+
// and makes tool-call JSON output reliable.
|
|
226
|
+
if (isQwen3) {
|
|
227
|
+
requestBody.options = { think: false };
|
|
228
|
+
}
|
|
229
|
+
|
|
179
230
|
response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
|
180
231
|
method: 'POST',
|
|
181
232
|
headers: { 'Content-Type': 'application/json' },
|
|
182
233
|
signal: controller.signal,
|
|
183
|
-
body: JSON.stringify(
|
|
184
|
-
model: this.model,
|
|
185
|
-
messages,
|
|
186
|
-
tools: TOOLS,
|
|
187
|
-
tool_choice: 'auto',
|
|
188
|
-
stream: true
|
|
189
|
-
})
|
|
234
|
+
body: JSON.stringify(requestBody)
|
|
190
235
|
});
|
|
191
236
|
} catch (fetchErr) {
|
|
192
237
|
if (fetchErr.name === 'AbortError') break;
|
|
@@ -272,6 +317,7 @@ export class OllamaAgent extends EventEmitter {
|
|
|
272
317
|
thinkBuffer = inThinkBlock ? thinkBuffer.slice(thinkBuffer.lastIndexOf('<think>')) : '';
|
|
273
318
|
|
|
274
319
|
streamContent += out;
|
|
320
|
+
allOutput += out;
|
|
275
321
|
if (out) {
|
|
276
322
|
this.emit('agent_output', { agentId, output: out });
|
|
277
323
|
}
|
|
@@ -321,7 +367,28 @@ export class OllamaAgent extends EventEmitter {
|
|
|
321
367
|
description: `✓ ${name}`
|
|
322
368
|
});
|
|
323
369
|
|
|
324
|
-
|
|
370
|
+
// If the tool returned an image (base64), push it as a vision message
|
|
371
|
+
// so the model can actually see what was captured.
|
|
372
|
+
const isImageResult = typeof result === 'string' && result.startsWith('data:image/');
|
|
373
|
+
if (isImageResult && isVision) {
|
|
374
|
+
messages.push({
|
|
375
|
+
role: 'tool',
|
|
376
|
+
tool_call_id: toolCall.id || undefined,
|
|
377
|
+
content: '[Screenshot captured — see image attached]'
|
|
378
|
+
});
|
|
379
|
+
const base64 = result.replace(/^data:image\/\w+;base64,/, '');
|
|
380
|
+
messages.push({
|
|
381
|
+
role: 'user',
|
|
382
|
+
content: 'Here is the screenshot:',
|
|
383
|
+
images: [base64]
|
|
384
|
+
});
|
|
385
|
+
} else {
|
|
386
|
+
messages.push({
|
|
387
|
+
role: 'tool',
|
|
388
|
+
tool_call_id: toolCall.id || undefined,
|
|
389
|
+
content: isImageResult ? '[Screenshot captured — install a vision model to analyze images]' : String(result)
|
|
390
|
+
});
|
|
391
|
+
}
|
|
325
392
|
}
|
|
326
393
|
// Loop back — model will respond to the tool results
|
|
327
394
|
continue;
|
|
@@ -332,6 +399,12 @@ export class OllamaAgent extends EventEmitter {
|
|
|
332
399
|
finalContent = streamContent;
|
|
333
400
|
}
|
|
334
401
|
break;
|
|
402
|
+
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Use all accumulated output if final turn had no content (agent ended after tool calls)
|
|
406
|
+
if (!finalContent && allOutput) {
|
|
407
|
+
finalContent = allOutput;
|
|
335
408
|
}
|
|
336
409
|
|
|
337
410
|
// Persist history for next task
|
|
@@ -434,6 +507,22 @@ export class OllamaAgent extends EventEmitter {
|
|
|
434
507
|
return text.slice(0, 4000) + (text.length > 4000 ? '\n...(truncated)' : '');
|
|
435
508
|
}
|
|
436
509
|
|
|
510
|
+
case 'take_screenshot': {
|
|
511
|
+
const target = args.target || 'screen';
|
|
512
|
+
const tmpFile = `/tmp/af_screenshot_${Date.now()}.png`;
|
|
513
|
+
|
|
514
|
+
if (target === 'browser') {
|
|
515
|
+
// Navigate + screenshot via CDP on agent browser (port 9223)
|
|
516
|
+
return await this._cdpScreenshot(args.url, tmpFile);
|
|
517
|
+
} else {
|
|
518
|
+
// Full screen capture
|
|
519
|
+
await execAsync(`screencapture -x "${tmpFile}"`);
|
|
520
|
+
const data = readFileSync(tmpFile).toString('base64');
|
|
521
|
+
try { await execAsync(`rm -f "${tmpFile}"`); } catch {}
|
|
522
|
+
return `data:image/png;base64,${data}`;
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
437
526
|
default:
|
|
438
527
|
return `Unknown tool: ${name}`;
|
|
439
528
|
}
|
|
@@ -442,6 +531,69 @@ export class OllamaAgent extends EventEmitter {
|
|
|
442
531
|
}
|
|
443
532
|
}
|
|
444
533
|
|
|
534
|
+
// ─── CDP browser screenshot ───────────────────────────────────────────────
|
|
535
|
+
|
|
536
|
+
async _cdpScreenshot(navigateUrl, tmpFile) {
|
|
537
|
+
const CDP_PORT = 9223;
|
|
538
|
+
let tabId;
|
|
539
|
+
|
|
540
|
+
// Get or create a tab
|
|
541
|
+
const tabsRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json`);
|
|
542
|
+
const tabs = await tabsRes.json();
|
|
543
|
+
const usable = tabs.find(t => t.type === 'page' && t.webSocketDebuggerUrl);
|
|
544
|
+
|
|
545
|
+
if (!usable) {
|
|
546
|
+
// Create new tab
|
|
547
|
+
const newTab = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
|
|
548
|
+
const newTabData = await newTab.json();
|
|
549
|
+
tabId = newTabData.id;
|
|
550
|
+
} else {
|
|
551
|
+
tabId = usable.id;
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
return new Promise((resolve, reject) => {
|
|
555
|
+
// Inline WebSocket CDP — no ws package dependency needed (Node 22 has WebSocket built in)
|
|
556
|
+
const ws = new WebSocket(`ws://127.0.0.1:${CDP_PORT}/devtools/page/${tabId}`);
|
|
557
|
+
let msgId = 1;
|
|
558
|
+
const pending = new Map();
|
|
559
|
+
|
|
560
|
+
const send = (method, params = {}) => new Promise((res, rej) => {
|
|
561
|
+
const id = msgId++;
|
|
562
|
+
pending.set(id, { resolve: res, reject: rej });
|
|
563
|
+
ws.send(JSON.stringify({ id, method, params }));
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
ws.addEventListener('message', (evt) => {
|
|
567
|
+
const msg = JSON.parse(evt.data);
|
|
568
|
+
if (msg.id && pending.has(msg.id)) {
|
|
569
|
+
const { resolve: res, reject: rej } = pending.get(msg.id);
|
|
570
|
+
pending.delete(msg.id);
|
|
571
|
+
if (msg.error) rej(new Error(msg.error.message));
|
|
572
|
+
else res(msg.result);
|
|
573
|
+
}
|
|
574
|
+
});
|
|
575
|
+
|
|
576
|
+
ws.addEventListener('open', async () => {
|
|
577
|
+
try {
|
|
578
|
+
if (navigateUrl) {
|
|
579
|
+
await send('Page.navigate', { url: navigateUrl });
|
|
580
|
+
// Wait for load
|
|
581
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
582
|
+
}
|
|
583
|
+
const { data } = await send('Page.captureScreenshot', { format: 'png' });
|
|
584
|
+
ws.close();
|
|
585
|
+
resolve(`data:image/png;base64,${data}`);
|
|
586
|
+
} catch (err) {
|
|
587
|
+
ws.close();
|
|
588
|
+
reject(err);
|
|
589
|
+
}
|
|
590
|
+
});
|
|
591
|
+
|
|
592
|
+
ws.addEventListener('error', (err) => reject(new Error(`CDP WebSocket error: ${err.message}`)));
|
|
593
|
+
setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 20000);
|
|
594
|
+
});
|
|
595
|
+
}
|
|
596
|
+
|
|
445
597
|
_resolvePath(p, workDir) {
|
|
446
598
|
return path.isAbsolute(p) ? p : path.join(workDir, p);
|
|
447
599
|
}
|
|
@@ -459,6 +611,8 @@ export class OllamaAgent extends EventEmitter {
|
|
|
459
611
|
case 'web_fetch': {
|
|
460
612
|
try { return `Fetching ${new URL(args.url).hostname}`; } catch { return 'Fetching URL'; }
|
|
461
613
|
}
|
|
614
|
+
case 'take_screenshot':
|
|
615
|
+
return `Screenshot: ${args.url || args.target}`;
|
|
462
616
|
default:
|
|
463
617
|
return name;
|
|
464
618
|
}
|
package/src/supervisor.js
CHANGED
|
@@ -39,11 +39,18 @@ function removePid(file) {
|
|
|
39
39
|
export async function runSupervisor(innerArgv) {
|
|
40
40
|
writePid(PID_FILE, process.pid);
|
|
41
41
|
|
|
42
|
+
// SIGTERM on supervisor = intentional stop (from agentforge stop command)
|
|
42
43
|
process.on('SIGTERM', () => {
|
|
43
44
|
console.log('[supervisor] Received SIGTERM — shutting down');
|
|
44
45
|
removePid(PID_FILE);
|
|
45
46
|
process.exit(0);
|
|
46
47
|
});
|
|
48
|
+
// SIGINT = Ctrl+C in foreground terminal = intentional stop
|
|
49
|
+
process.on('SIGINT', () => {
|
|
50
|
+
console.log('[supervisor] Received SIGINT — shutting down');
|
|
51
|
+
removePid(PID_FILE);
|
|
52
|
+
process.exit(0);
|
|
53
|
+
});
|
|
47
54
|
|
|
48
55
|
let consecutiveCrashes = 0;
|
|
49
56
|
|
package/src/worker.js
CHANGED
|
@@ -1756,12 +1756,12 @@ Review and add specific steps, pitfalls, and patterns that helped succeed.
|
|
|
1756
1756
|
});
|
|
1757
1757
|
}
|
|
1758
1758
|
|
|
1759
|
-
async shutdown() {
|
|
1759
|
+
async shutdown(code = 1) {
|
|
1760
1760
|
console.log('🛑 Shutting down worker...');
|
|
1761
1761
|
if (this.ws) {
|
|
1762
1762
|
this.ws.close();
|
|
1763
1763
|
}
|
|
1764
|
-
process.exit(
|
|
1764
|
+
process.exit(code);
|
|
1765
1765
|
}
|
|
1766
1766
|
|
|
1767
1767
|
// Find the AgentForge git repo root, regardless of whether worker is globally installed or run from source
|