@hamp10/agentforge 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/agentforge.js CHANGED
@@ -274,9 +274,9 @@ program
274
274
  const worker = new AgentForgeWorker(config.token, wsUrl, config);
275
275
 
276
276
  // Graceful shutdown
277
- process.on('SIGINT', () => { console.log('\n[SIGINT received]'); worker.shutdown(); });
278
- process.on('SIGTERM', () => { console.log('\n[SIGTERM received]'); worker.shutdown(); });
279
- process.on('SIGHUP', () => { console.log('\n[SIGHUP received — terminal closed]'); worker.shutdown(); });
277
+ process.on('SIGINT', () => { console.log('\n[SIGINT received — stopping]'); worker.shutdown(0); }); // Ctrl+C: clean stop
278
+ process.on('SIGTERM', () => { console.log('\n[SIGTERM received — restarting]'); worker.shutdown(1); }); // kill: supervisor restarts
279
+ process.on('SIGHUP', () => { console.log('\n[SIGHUP received — restarting]'); worker.shutdown(1); }); // terminal close: supervisor restarts
280
280
 
281
281
  try {
282
282
  await worker.initialize();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hamp10/agentforge",
3
- "version": "0.2.1",
3
+ "version": "0.2.3",
4
4
  "description": "AgentForge worker — connect your machine to agentforge.ai",
5
5
  "type": "module",
6
6
  "bin": {
@@ -79,6 +79,28 @@ const TOOLS = [
79
79
  required: ['url']
80
80
  }
81
81
  }
82
+ },
83
+ {
84
+ type: 'function',
85
+ function: {
86
+ name: 'take_screenshot',
87
+ description: 'Take a screenshot of the current screen or the agent browser (port 9223). Returns base64 image data you can analyze visually. Use this to check what a webpage looks like, verify a build result, or monitor a running process.',
88
+ parameters: {
89
+ type: 'object',
90
+ properties: {
91
+ target: {
92
+ type: 'string',
93
+ enum: ['screen', 'browser'],
94
+ description: 'screen = full screen capture. browser = screenshot of the agent browser (port 9223).'
95
+ },
96
+ url: {
97
+ type: 'string',
98
+ description: 'Optional: navigate the browser to this URL before taking the screenshot.'
99
+ }
100
+ },
101
+ required: ['target']
102
+ }
103
+ }
82
104
  }
83
105
  ];
84
106
 
@@ -146,26 +168,42 @@ export class OllamaAgent extends EventEmitter {
146
168
  console.log(` Task: ${task}`);
147
169
  console.log(` Working dir: ${workDir}`);
148
170
 
171
+ // Detect model capabilities
172
+ const isQwen3 = this.model.startsWith('qwen3');
173
+ const isVision = /vl|vision|llava|minicpm-v|moondream/i.test(this.model);
174
+
149
175
  try {
150
176
  // Load conversation history from disk (session persistence)
151
177
  const history = this._loadHistory(agentId, workDir, sessionId);
152
178
 
179
+ const systemPrompt = [
180
+ `You are an AI agent running on AgentForge.ai.`,
181
+ `Your working directory is: ${workDir}`,
182
+ ``,
183
+ `CRITICAL RULES — follow these exactly:`,
184
+ `1. Use the provided tools to complete the task. Do NOT write Python code, pseudo-code, or code blocks to simulate tool calls.`,
185
+ `2. To run a command, call the "bash" tool. To read a file, call "read_file". To write, call "write_file". To take a screenshot, call "take_screenshot".`,
186
+ `3. Every action must be a real tool call — not described in text, not shown as code.`,
187
+ `4. When you take a screenshot, you will receive the actual image back and can see it.`,
188
+ `5. When you are done, write a clear summary of what you accomplished.`,
189
+ `6. Do not ask for clarification — make your best judgment and act.`,
190
+ ].join('\n');
191
+
153
192
  const messages = [
154
- {
155
- role: 'system',
156
- content: [
157
- `You are an AI agent running on AgentForge.ai.`,
158
- `Your working directory is: ${workDir}`,
159
- `Use the available tools to complete the task autonomously.`,
160
- `When you are done, write a clear summary of what you accomplished.`,
161
- `Do not ask for clarification — make your best judgment and act.`
162
- ].join('\n')
163
- },
193
+ { role: 'system', content: systemPrompt },
164
194
  ...history,
165
- { role: 'user', content: task }
166
195
  ];
167
196
 
197
+ // Attach initial image to user message if provided
198
+ const userMessage = { role: 'user', content: task };
199
+ if (image && isVision) {
200
+ const base64 = image.replace(/^data:image\/\w+;base64,/, '');
201
+ userMessage.images = [base64];
202
+ }
203
+ messages.push(userMessage);
204
+
168
205
  let finalContent = '';
206
+ let allOutput = ''; // accumulate everything streamed across all turns
169
207
  const MAX_TURNS = 25;
170
208
 
171
209
  for (let turn = 0; turn < MAX_TURNS; turn++) {
@@ -175,18 +213,25 @@ export class OllamaAgent extends EventEmitter {
175
213
 
176
214
  let response;
177
215
  try {
178
- // OpenAI-compatible endpoint — works with Ollama, LM Studio, Jan, llama.cpp, vLLM, etc.
216
+ const requestBody = {
217
+ model: this.model,
218
+ messages,
219
+ tools: TOOLS,
220
+ tool_choice: 'auto',
221
+ stream: true,
222
+ };
223
+
224
+ // Disable thinking mode for qwen3 — prevents 3-minute silent think phases
225
+ // and makes tool-call JSON output reliable.
226
+ if (isQwen3) {
227
+ requestBody.options = { think: false };
228
+ }
229
+
179
230
  response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
180
231
  method: 'POST',
181
232
  headers: { 'Content-Type': 'application/json' },
182
233
  signal: controller.signal,
183
- body: JSON.stringify({
184
- model: this.model,
185
- messages,
186
- tools: TOOLS,
187
- tool_choice: 'auto',
188
- stream: true
189
- })
234
+ body: JSON.stringify(requestBody)
190
235
  });
191
236
  } catch (fetchErr) {
192
237
  if (fetchErr.name === 'AbortError') break;
@@ -272,6 +317,7 @@ export class OllamaAgent extends EventEmitter {
272
317
  thinkBuffer = inThinkBlock ? thinkBuffer.slice(thinkBuffer.lastIndexOf('<think>')) : '';
273
318
 
274
319
  streamContent += out;
320
+ allOutput += out;
275
321
  if (out) {
276
322
  this.emit('agent_output', { agentId, output: out });
277
323
  }
@@ -321,7 +367,28 @@ export class OllamaAgent extends EventEmitter {
321
367
  description: `✓ ${name}`
322
368
  });
323
369
 
324
- messages.push({ role: 'tool', content: String(result) });
370
+ // If the tool returned an image (base64), push it as a vision message
371
+ // so the model can actually see what was captured.
372
+ const isImageResult = typeof result === 'string' && result.startsWith('data:image/');
373
+ if (isImageResult && isVision) {
374
+ messages.push({
375
+ role: 'tool',
376
+ tool_call_id: toolCall.id || undefined,
377
+ content: '[Screenshot captured — see image attached]'
378
+ });
379
+ const base64 = result.replace(/^data:image\/\w+;base64,/, '');
380
+ messages.push({
381
+ role: 'user',
382
+ content: 'Here is the screenshot:',
383
+ images: [base64]
384
+ });
385
+ } else {
386
+ messages.push({
387
+ role: 'tool',
388
+ tool_call_id: toolCall.id || undefined,
389
+ content: isImageResult ? '[Screenshot captured — install a vision model to analyze images]' : String(result)
390
+ });
391
+ }
325
392
  }
326
393
  // Loop back — model will respond to the tool results
327
394
  continue;
@@ -332,6 +399,12 @@ export class OllamaAgent extends EventEmitter {
332
399
  finalContent = streamContent;
333
400
  }
334
401
  break;
402
+
403
+ }
404
+
405
+ // Use all accumulated output if final turn had no content (agent ended after tool calls)
406
+ if (!finalContent && allOutput) {
407
+ finalContent = allOutput;
335
408
  }
336
409
 
337
410
  // Persist history for next task
@@ -434,6 +507,22 @@ export class OllamaAgent extends EventEmitter {
434
507
  return text.slice(0, 4000) + (text.length > 4000 ? '\n...(truncated)' : '');
435
508
  }
436
509
 
510
+ case 'take_screenshot': {
511
+ const target = args.target || 'screen';
512
+ const tmpFile = `/tmp/af_screenshot_${Date.now()}.png`;
513
+
514
+ if (target === 'browser') {
515
+ // Navigate + screenshot via CDP on agent browser (port 9223)
516
+ return await this._cdpScreenshot(args.url, tmpFile);
517
+ } else {
518
+ // Full screen capture
519
+ await execAsync(`screencapture -x "${tmpFile}"`);
520
+ const data = readFileSync(tmpFile).toString('base64');
521
+ try { await execAsync(`rm -f "${tmpFile}"`); } catch {}
522
+ return `data:image/png;base64,${data}`;
523
+ }
524
+ }
525
+
437
526
  default:
438
527
  return `Unknown tool: ${name}`;
439
528
  }
@@ -442,6 +531,69 @@ export class OllamaAgent extends EventEmitter {
442
531
  }
443
532
  }
444
533
 
534
+ // ─── CDP browser screenshot ───────────────────────────────────────────────
535
+
536
+ async _cdpScreenshot(navigateUrl, tmpFile) {
537
+ const CDP_PORT = 9223;
538
+ let tabId;
539
+
540
+ // Get or create a tab
541
+ const tabsRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json`);
542
+ const tabs = await tabsRes.json();
543
+ const usable = tabs.find(t => t.type === 'page' && t.webSocketDebuggerUrl);
544
+
545
+ if (!usable) {
546
+ // Create new tab
547
+ const newTab = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
548
+ const newTabData = await newTab.json();
549
+ tabId = newTabData.id;
550
+ } else {
551
+ tabId = usable.id;
552
+ }
553
+
554
+ return new Promise((resolve, reject) => {
555
+ // Inline WebSocket CDP — no ws package dependency needed (Node 22 has WebSocket built in)
556
+ const ws = new WebSocket(`ws://127.0.0.1:${CDP_PORT}/devtools/page/${tabId}`);
557
+ let msgId = 1;
558
+ const pending = new Map();
559
+
560
+ const send = (method, params = {}) => new Promise((res, rej) => {
561
+ const id = msgId++;
562
+ pending.set(id, { resolve: res, reject: rej });
563
+ ws.send(JSON.stringify({ id, method, params }));
564
+ });
565
+
566
+ ws.addEventListener('message', (evt) => {
567
+ const msg = JSON.parse(evt.data);
568
+ if (msg.id && pending.has(msg.id)) {
569
+ const { resolve: res, reject: rej } = pending.get(msg.id);
570
+ pending.delete(msg.id);
571
+ if (msg.error) rej(new Error(msg.error.message));
572
+ else res(msg.result);
573
+ }
574
+ });
575
+
576
+ ws.addEventListener('open', async () => {
577
+ try {
578
+ if (navigateUrl) {
579
+ await send('Page.navigate', { url: navigateUrl });
580
+ // Wait for load
581
+ await new Promise(r => setTimeout(r, 3000));
582
+ }
583
+ const { data } = await send('Page.captureScreenshot', { format: 'png' });
584
+ ws.close();
585
+ resolve(`data:image/png;base64,${data}`);
586
+ } catch (err) {
587
+ ws.close();
588
+ reject(err);
589
+ }
590
+ });
591
+
592
+ ws.addEventListener('error', (err) => reject(new Error(`CDP WebSocket error: ${err.message}`)));
593
+ setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 20000);
594
+ });
595
+ }
596
+
445
597
  _resolvePath(p, workDir) {
446
598
  return path.isAbsolute(p) ? p : path.join(workDir, p);
447
599
  }
@@ -459,6 +611,8 @@ export class OllamaAgent extends EventEmitter {
459
611
  case 'web_fetch': {
460
612
  try { return `Fetching ${new URL(args.url).hostname}`; } catch { return 'Fetching URL'; }
461
613
  }
614
+ case 'take_screenshot':
615
+ return `Screenshot: ${args.url || args.target}`;
462
616
  default:
463
617
  return name;
464
618
  }
package/src/supervisor.js CHANGED
@@ -39,11 +39,18 @@ function removePid(file) {
39
39
  export async function runSupervisor(innerArgv) {
40
40
  writePid(PID_FILE, process.pid);
41
41
 
42
+ // SIGTERM on supervisor = intentional stop (from agentforge stop command)
42
43
  process.on('SIGTERM', () => {
43
44
  console.log('[supervisor] Received SIGTERM — shutting down');
44
45
  removePid(PID_FILE);
45
46
  process.exit(0);
46
47
  });
48
+ // SIGINT = Ctrl+C in foreground terminal = intentional stop
49
+ process.on('SIGINT', () => {
50
+ console.log('[supervisor] Received SIGINT — shutting down');
51
+ removePid(PID_FILE);
52
+ process.exit(0);
53
+ });
47
54
 
48
55
  let consecutiveCrashes = 0;
49
56
 
package/src/worker.js CHANGED
@@ -1756,12 +1756,12 @@ Review and add specific steps, pitfalls, and patterns that helped succeed.
1756
1756
  });
1757
1757
  }
1758
1758
 
1759
- async shutdown() {
1759
+ async shutdown(code = 1) {
1760
1760
  console.log('🛑 Shutting down worker...');
1761
1761
  if (this.ws) {
1762
1762
  this.ws.close();
1763
1763
  }
1764
- process.exit(0);
1764
+ process.exit(code);
1765
1765
  }
1766
1766
 
1767
1767
  // Find the AgentForge git repo root, regardless of whether worker is globally installed or run from source