@hamp10/agentforge 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/agentforge.js CHANGED
@@ -274,9 +274,9 @@ program
274
274
  const worker = new AgentForgeWorker(config.token, wsUrl, config);
275
275
 
276
276
  // Graceful shutdown
277
- process.on('SIGINT', () => { console.log('\n[SIGINT received]'); worker.shutdown(); });
278
- process.on('SIGTERM', () => { console.log('\n[SIGTERM received]'); worker.shutdown(); });
279
- process.on('SIGHUP', () => { console.log('\n[SIGHUP received — terminal closed]'); worker.shutdown(); });
277
+ process.on('SIGINT', () => { console.log('\n[SIGINT received — stopping]'); worker.shutdown(0); }); // Ctrl+C: clean stop
278
+ process.on('SIGTERM', () => { console.log('\n[SIGTERM received — restarting]'); worker.shutdown(1); }); // kill: supervisor restarts
279
+ process.on('SIGHUP', () => { console.log('\n[SIGHUP received — restarting]'); worker.shutdown(1); }); // terminal close: supervisor restarts
280
280
 
281
281
  try {
282
282
  await worker.initialize();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hamp10/agentforge",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "description": "AgentForge worker — connect your machine to agentforge.ai",
5
5
  "type": "module",
6
6
  "bin": {
@@ -79,6 +79,28 @@ const TOOLS = [
79
79
  required: ['url']
80
80
  }
81
81
  }
82
+ },
83
+ {
84
+ type: 'function',
85
+ function: {
86
+ name: 'take_screenshot',
87
+ description: 'Take a screenshot of the current screen or the agent browser (port 9223). Returns base64 image data you can analyze visually. Use this to check what a webpage looks like, verify a build result, or monitor a running process.',
88
+ parameters: {
89
+ type: 'object',
90
+ properties: {
91
+ target: {
92
+ type: 'string',
93
+ enum: ['screen', 'browser'],
94
+ description: 'screen = full screen capture. browser = screenshot of the agent browser (port 9223).'
95
+ },
96
+ url: {
97
+ type: 'string',
98
+ description: 'Optional: navigate the browser to this URL before taking the screenshot.'
99
+ }
100
+ },
101
+ required: ['target']
102
+ }
103
+ }
82
104
  }
83
105
  ];
84
106
 
@@ -146,25 +168,40 @@ export class OllamaAgent extends EventEmitter {
146
168
  console.log(` Task: ${task}`);
147
169
  console.log(` Working dir: ${workDir}`);
148
170
 
171
+ // Detect model capabilities
172
+ const isQwen3 = this.model.startsWith('qwen3');
173
+ const isVision = /vl|vision|llava|minicpm-v|moondream/i.test(this.model);
174
+
149
175
  try {
150
176
  // Load conversation history from disk (session persistence)
151
177
  const history = this._loadHistory(agentId, workDir, sessionId);
152
178
 
179
+ const systemPrompt = [
180
+ `You are an AI agent running on AgentForge.ai.`,
181
+ `Your working directory is: ${workDir}`,
182
+ ``,
183
+ `CRITICAL RULES — follow these exactly:`,
184
+ `1. Use the provided tools to complete the task. Do NOT write Python code, pseudo-code, or code blocks to simulate tool calls.`,
185
+ `2. To run a command, call the "bash" tool. To read a file, call "read_file". To write, call "write_file". To take a screenshot, call "take_screenshot".`,
186
+ `3. Every action must be a real tool call — not described in text, not shown as code.`,
187
+ `4. When you take a screenshot, you will receive the actual image back and can see it.`,
188
+ `5. When you are done, write a clear summary of what you accomplished.`,
189
+ `6. Do not ask for clarification — make your best judgment and act.`,
190
+ ].join('\n');
191
+
153
192
  const messages = [
154
- {
155
- role: 'system',
156
- content: [
157
- `You are an AI agent running on AgentForge.ai.`,
158
- `Your working directory is: ${workDir}`,
159
- `Use the available tools to complete the task autonomously.`,
160
- `When you are done, write a clear summary of what you accomplished.`,
161
- `Do not ask for clarification — make your best judgment and act.`
162
- ].join('\n')
163
- },
193
+ { role: 'system', content: systemPrompt },
164
194
  ...history,
165
- { role: 'user', content: task }
166
195
  ];
167
196
 
197
+ // Attach initial image to user message if provided
198
+ const userMessage = { role: 'user', content: task };
199
+ if (image && isVision) {
200
+ const base64 = image.replace(/^data:image\/\w+;base64,/, '');
201
+ userMessage.images = [base64];
202
+ }
203
+ messages.push(userMessage);
204
+
168
205
  let finalContent = '';
169
206
  const MAX_TURNS = 25;
170
207
 
@@ -175,18 +212,25 @@ export class OllamaAgent extends EventEmitter {
175
212
 
176
213
  let response;
177
214
  try {
178
- // OpenAI-compatible endpoint — works with Ollama, LM Studio, Jan, llama.cpp, vLLM, etc.
215
+ const requestBody = {
216
+ model: this.model,
217
+ messages,
218
+ tools: TOOLS,
219
+ tool_choice: 'auto',
220
+ stream: true,
221
+ };
222
+
223
+ // Disable thinking mode for qwen3 — prevents 3-minute silent think phases
224
+ // and makes tool-call JSON output reliable.
225
+ if (isQwen3) {
226
+ requestBody.options = { think: false };
227
+ }
228
+
179
229
  response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
180
230
  method: 'POST',
181
231
  headers: { 'Content-Type': 'application/json' },
182
232
  signal: controller.signal,
183
- body: JSON.stringify({
184
- model: this.model,
185
- messages,
186
- tools: TOOLS,
187
- tool_choice: 'auto',
188
- stream: true
189
- })
233
+ body: JSON.stringify(requestBody)
190
234
  });
191
235
  } catch (fetchErr) {
192
236
  if (fetchErr.name === 'AbortError') break;
@@ -321,7 +365,28 @@ export class OllamaAgent extends EventEmitter {
321
365
  description: `✓ ${name}`
322
366
  });
323
367
 
324
- messages.push({ role: 'tool', content: String(result) });
368
+ // If the tool returned an image (base64), push it as a vision message
369
+ // so the model can actually see what was captured.
370
+ const isImageResult = typeof result === 'string' && result.startsWith('data:image/');
371
+ if (isImageResult && isVision) {
372
+ messages.push({
373
+ role: 'tool',
374
+ tool_call_id: toolCall.id || undefined,
375
+ content: '[Screenshot captured — see image attached]'
376
+ });
377
+ const base64 = result.replace(/^data:image\/\w+;base64,/, '');
378
+ messages.push({
379
+ role: 'user',
380
+ content: 'Here is the screenshot:',
381
+ images: [base64]
382
+ });
383
+ } else {
384
+ messages.push({
385
+ role: 'tool',
386
+ tool_call_id: toolCall.id || undefined,
387
+ content: isImageResult ? '[Screenshot captured — install a vision model to analyze images]' : String(result)
388
+ });
389
+ }
325
390
  }
326
391
  // Loop back — model will respond to the tool results
327
392
  continue;
@@ -434,6 +499,22 @@ export class OllamaAgent extends EventEmitter {
434
499
  return text.slice(0, 4000) + (text.length > 4000 ? '\n...(truncated)' : '');
435
500
  }
436
501
 
502
+ case 'take_screenshot': {
503
+ const target = args.target || 'screen';
504
+ const tmpFile = `/tmp/af_screenshot_${Date.now()}.png`;
505
+
506
+ if (target === 'browser') {
507
+ // Navigate + screenshot via CDP on agent browser (port 9223)
508
+ return await this._cdpScreenshot(args.url, tmpFile);
509
+ } else {
510
+ // Full screen capture
511
+ await execAsync(`screencapture -x "${tmpFile}"`);
512
+ const data = readFileSync(tmpFile).toString('base64');
513
+ try { await execAsync(`rm -f "${tmpFile}"`); } catch {}
514
+ return `data:image/png;base64,${data}`;
515
+ }
516
+ }
517
+
437
518
  default:
438
519
  return `Unknown tool: ${name}`;
439
520
  }
@@ -442,6 +523,69 @@ export class OllamaAgent extends EventEmitter {
442
523
  }
443
524
  }
444
525
 
526
+ // ─── CDP browser screenshot ───────────────────────────────────────────────
527
+
528
+ async _cdpScreenshot(navigateUrl, tmpFile) {
529
+ const CDP_PORT = 9223;
530
+ let tabId;
531
+
532
+ // Get or create a tab
533
+ const tabsRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json`);
534
+ const tabs = await tabsRes.json();
535
+ const usable = tabs.find(t => t.type === 'page' && t.webSocketDebuggerUrl);
536
+
537
+ if (!usable) {
538
+ // Create new tab
539
+ const newTab = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
540
+ const newTabData = await newTab.json();
541
+ tabId = newTabData.id;
542
+ } else {
543
+ tabId = usable.id;
544
+ }
545
+
546
+ return new Promise((resolve, reject) => {
547
+ // Inline WebSocket CDP — no ws package dependency needed (Node 22 has WebSocket built in)
548
+ const ws = new WebSocket(`ws://127.0.0.1:${CDP_PORT}/devtools/page/${tabId}`);
549
+ let msgId = 1;
550
+ const pending = new Map();
551
+
552
+ const send = (method, params = {}) => new Promise((res, rej) => {
553
+ const id = msgId++;
554
+ pending.set(id, { resolve: res, reject: rej });
555
+ ws.send(JSON.stringify({ id, method, params }));
556
+ });
557
+
558
+ ws.addEventListener('message', (evt) => {
559
+ const msg = JSON.parse(evt.data);
560
+ if (msg.id && pending.has(msg.id)) {
561
+ const { resolve: res, reject: rej } = pending.get(msg.id);
562
+ pending.delete(msg.id);
563
+ if (msg.error) rej(new Error(msg.error.message));
564
+ else res(msg.result);
565
+ }
566
+ });
567
+
568
+ ws.addEventListener('open', async () => {
569
+ try {
570
+ if (navigateUrl) {
571
+ await send('Page.navigate', { url: navigateUrl });
572
+ // Wait for load
573
+ await new Promise(r => setTimeout(r, 3000));
574
+ }
575
+ const { data } = await send('Page.captureScreenshot', { format: 'png' });
576
+ ws.close();
577
+ resolve(`data:image/png;base64,${data}`);
578
+ } catch (err) {
579
+ ws.close();
580
+ reject(err);
581
+ }
582
+ });
583
+
584
+ ws.addEventListener('error', (err) => reject(new Error(`CDP WebSocket error: ${err.message}`)));
585
+ setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 20000);
586
+ });
587
+ }
588
+
445
589
  _resolvePath(p, workDir) {
446
590
  return path.isAbsolute(p) ? p : path.join(workDir, p);
447
591
  }
@@ -459,6 +603,8 @@ export class OllamaAgent extends EventEmitter {
459
603
  case 'web_fetch': {
460
604
  try { return `Fetching ${new URL(args.url).hostname}`; } catch { return 'Fetching URL'; }
461
605
  }
606
+ case 'take_screenshot':
607
+ return `Screenshot: ${args.url || args.target}`;
462
608
  default:
463
609
  return name;
464
610
  }
package/src/supervisor.js CHANGED
@@ -39,11 +39,18 @@ function removePid(file) {
39
39
  export async function runSupervisor(innerArgv) {
40
40
  writePid(PID_FILE, process.pid);
41
41
 
42
+ // SIGTERM on supervisor = intentional stop (from agentforge stop command)
42
43
  process.on('SIGTERM', () => {
43
44
  console.log('[supervisor] Received SIGTERM — shutting down');
44
45
  removePid(PID_FILE);
45
46
  process.exit(0);
46
47
  });
48
+ // SIGINT = Ctrl+C in foreground terminal = intentional stop
49
+ process.on('SIGINT', () => {
50
+ console.log('[supervisor] Received SIGINT — shutting down');
51
+ removePid(PID_FILE);
52
+ process.exit(0);
53
+ });
47
54
 
48
55
  let consecutiveCrashes = 0;
49
56
 
package/src/worker.js CHANGED
@@ -1756,12 +1756,12 @@ Review and add specific steps, pitfalls, and patterns that helped succeed.
1756
1756
  });
1757
1757
  }
1758
1758
 
1759
- async shutdown() {
1759
+ async shutdown(code = 1) {
1760
1760
  console.log('🛑 Shutting down worker...');
1761
1761
  if (this.ws) {
1762
1762
  this.ws.close();
1763
1763
  }
1764
- process.exit(0);
1764
+ process.exit(code);
1765
1765
  }
1766
1766
 
1767
1767
  // Find the AgentForge git repo root, regardless of whether worker is globally installed or run from source