npm - @hamp10/agentforge - Versions diffs - 0.2.1 → 0.2.3 - Mend

@hamp10/agentforge 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/bin/agentforge.js CHANGED Viewed

@@ -274,9 +274,9 @@ program
     const worker = new AgentForgeWorker(config.token, wsUrl, config);
     // Graceful shutdown
-    process.on('SIGINT', () => { console.log('\n[SIGINT received]'); worker.shutdown(); });
-    process.on('SIGTERM', () => { console.log('\n[SIGTERM received]'); worker.shutdown(); });
-    process.on('SIGHUP', () => { console.log('\n[SIGHUP received — terminal closed]'); worker.shutdown(); });
+    process.on('SIGINT',  () => { console.log('\n[SIGINT received — stopping]');   worker.shutdown(0); }); // Ctrl+C: clean stop
+    process.on('SIGTERM', () => { console.log('\n[SIGTERM received — restarting]'); worker.shutdown(1); }); // kill: supervisor restarts
+    process.on('SIGHUP',  () => { console.log('\n[SIGHUP received — restarting]');  worker.shutdown(1); }); // terminal close: supervisor restarts
     try {
       await worker.initialize();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hamp10/agentforge",
-  "version": "0.2.1",
+  "version": "0.2.3",
   "description": "AgentForge worker — connect your machine to agentforge.ai",
   "type": "module",
   "bin": {

package/src/OllamaAgent.js CHANGED Viewed

@@ -79,6 +79,28 @@ const TOOLS = [
         required: ['url']
       }
     }
+  },
+  {
+    type: 'function',
+    function: {
+      name: 'take_screenshot',
+      description: 'Take a screenshot of the current screen or the agent browser (port 9223). Returns base64 image data you can analyze visually. Use this to check what a webpage looks like, verify a build result, or monitor a running process.',
+      parameters: {
+        type: 'object',
+        properties: {
+          target: {
+            type: 'string',
+            enum: ['screen', 'browser'],
+            description: 'screen = full screen capture. browser = screenshot of the agent browser (port 9223).'
+          },
+          url: {
+            type: 'string',
+            description: 'Optional: navigate the browser to this URL before taking the screenshot.'
+          }
+        },
+        required: ['target']
+      }
+    }
   }
 ];
@@ -146,26 +168,42 @@ export class OllamaAgent extends EventEmitter {
     console.log(`   Task: ${task}`);
     console.log(`   Working dir: ${workDir}`);
+    // Detect model capabilities
+    const isQwen3    = this.model.startsWith('qwen3');
+    const isVision   = /vl|vision|llava|minicpm-v|moondream/i.test(this.model);
     try {
       // Load conversation history from disk (session persistence)
       const history = this._loadHistory(agentId, workDir, sessionId);
+      const systemPrompt = [
+        `You are an AI agent running on AgentForge.ai.`,
+        `Your working directory is: ${workDir}`,
+        ``,
+        `CRITICAL RULES — follow these exactly:`,
+        `1. Use the provided tools to complete the task. Do NOT write Python code, pseudo-code, or code blocks to simulate tool calls.`,
+        `2. To run a command, call the "bash" tool. To read a file, call "read_file". To write, call "write_file". To take a screenshot, call "take_screenshot".`,
+        `3. Every action must be a real tool call — not described in text, not shown as code.`,
+        `4. When you take a screenshot, you will receive the actual image back and can see it.`,
+        `5. When you are done, write a clear summary of what you accomplished.`,
+        `6. Do not ask for clarification — make your best judgment and act.`,
+      ].join('\n');
       const messages = [
-        {
-          role: 'system',
-          content: [
-            `You are an AI agent running on AgentForge.ai.`,
-            `Your working directory is: ${workDir}`,
-            `Use the available tools to complete the task autonomously.`,
-            `When you are done, write a clear summary of what you accomplished.`,
-            `Do not ask for clarification — make your best judgment and act.`
-          ].join('\n')
-        },
+        { role: 'system', content: systemPrompt },
         ...history,
-        { role: 'user', content: task }
       ];
+      // Attach initial image to user message if provided
+      const userMessage = { role: 'user', content: task };
+      if (image && isVision) {
+        const base64 = image.replace(/^data:image\/\w+;base64,/, '');
+        userMessage.images = [base64];
+      }
+      messages.push(userMessage);
       let finalContent = '';
+      let allOutput = ''; // accumulate everything streamed across all turns
       const MAX_TURNS = 25;
       for (let turn = 0; turn < MAX_TURNS; turn++) {
@@ -175,18 +213,25 @@ export class OllamaAgent extends EventEmitter {
         let response;
         try {
-          // OpenAI-compatible endpoint — works with Ollama, LM Studio, Jan, llama.cpp, vLLM, etc.
+          const requestBody = {
+            model: this.model,
+            messages,
+            tools: TOOLS,
+            tool_choice: 'auto',
+            stream: true,
+          };
+          // Disable thinking mode for qwen3 — prevents 3-minute silent think phases
+          // and makes tool-call JSON output reliable.
+          if (isQwen3) {
+            requestBody.options = { think: false };
+          }
           response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
             method: 'POST',
             headers: { 'Content-Type': 'application/json' },
             signal: controller.signal,
-            body: JSON.stringify({
-              model: this.model,
-              messages,
-              tools: TOOLS,
-              tool_choice: 'auto',
-              stream: true
-            })
+            body: JSON.stringify(requestBody)
           });
         } catch (fetchErr) {
           if (fetchErr.name === 'AbortError') break;
@@ -272,6 +317,7 @@ export class OllamaAgent extends EventEmitter {
               thinkBuffer = inThinkBlock ? thinkBuffer.slice(thinkBuffer.lastIndexOf('<think>')) : '';
               streamContent += out;
+              allOutput += out;
               if (out) {
                 this.emit('agent_output', { agentId, output: out });
               }
@@ -321,7 +367,28 @@ export class OllamaAgent extends EventEmitter {
               description: `✓ ${name}`
             });
-            messages.push({ role: 'tool', content: String(result) });
+            // If the tool returned an image (base64), push it as a vision message
+            // so the model can actually see what was captured.
+            const isImageResult = typeof result === 'string' && result.startsWith('data:image/');
+            if (isImageResult && isVision) {
+              messages.push({
+                role: 'tool',
+                tool_call_id: toolCall.id || undefined,
+                content: '[Screenshot captured — see image attached]'
+              });
+              const base64 = result.replace(/^data:image\/\w+;base64,/, '');
+              messages.push({
+                role: 'user',
+                content: 'Here is the screenshot:',
+                images: [base64]
+              });
+            } else {
+              messages.push({
+                role: 'tool',
+                tool_call_id: toolCall.id || undefined,
+                content: isImageResult ? '[Screenshot captured — install a vision model to analyze images]' : String(result)
+              });
+            }
           }
           // Loop back — model will respond to the tool results
           continue;
@@ -332,6 +399,12 @@ export class OllamaAgent extends EventEmitter {
           finalContent = streamContent;
         }
         break;
+      }
+      // Use all accumulated output if final turn had no content (agent ended after tool calls)
+      if (!finalContent && allOutput) {
+        finalContent = allOutput;
       }
       // Persist history for next task
@@ -434,6 +507,22 @@ export class OllamaAgent extends EventEmitter {
           return text.slice(0, 4000) + (text.length > 4000 ? '\n...(truncated)' : '');
         }
+        case 'take_screenshot': {
+          const target = args.target || 'screen';
+          const tmpFile = `/tmp/af_screenshot_${Date.now()}.png`;
+          if (target === 'browser') {
+            // Navigate + screenshot via CDP on agent browser (port 9223)
+            return await this._cdpScreenshot(args.url, tmpFile);
+          } else {
+            // Full screen capture
+            await execAsync(`screencapture -x "${tmpFile}"`);
+            const data = readFileSync(tmpFile).toString('base64');
+            try { await execAsync(`rm -f "${tmpFile}"`); } catch {}
+            return `data:image/png;base64,${data}`;
+          }
+        }
         default:
           return `Unknown tool: ${name}`;
       }
@@ -442,6 +531,69 @@ export class OllamaAgent extends EventEmitter {
     }
   }
+  // ─── CDP browser screenshot ───────────────────────────────────────────────
+  async _cdpScreenshot(navigateUrl, tmpFile) {
+    const CDP_PORT = 9223;
+    let tabId;
+    // Get or create a tab
+    const tabsRes = await fetch(`http://127.0.0.1:${CDP_PORT}/json`);
+    const tabs = await tabsRes.json();
+    const usable = tabs.find(t => t.type === 'page' && t.webSocketDebuggerUrl);
+    if (!usable) {
+      // Create new tab
+      const newTab = await fetch(`http://127.0.0.1:${CDP_PORT}/json/new`, { method: 'PUT' });
+      const newTabData = await newTab.json();
+      tabId = newTabData.id;
+    } else {
+      tabId = usable.id;
+    }
+    return new Promise((resolve, reject) => {
+      // Inline WebSocket CDP — no ws package dependency needed (Node 22 has WebSocket built in)
+      const ws = new WebSocket(`ws://127.0.0.1:${CDP_PORT}/devtools/page/${tabId}`);
+      let msgId = 1;
+      const pending = new Map();
+      const send = (method, params = {}) => new Promise((res, rej) => {
+        const id = msgId++;
+        pending.set(id, { resolve: res, reject: rej });
+        ws.send(JSON.stringify({ id, method, params }));
+      });
+      ws.addEventListener('message', (evt) => {
+        const msg = JSON.parse(evt.data);
+        if (msg.id && pending.has(msg.id)) {
+          const { resolve: res, reject: rej } = pending.get(msg.id);
+          pending.delete(msg.id);
+          if (msg.error) rej(new Error(msg.error.message));
+          else res(msg.result);
+        }
+      });
+      ws.addEventListener('open', async () => {
+        try {
+          if (navigateUrl) {
+            await send('Page.navigate', { url: navigateUrl });
+            // Wait for load
+            await new Promise(r => setTimeout(r, 3000));
+          }
+          const { data } = await send('Page.captureScreenshot', { format: 'png' });
+          ws.close();
+          resolve(`data:image/png;base64,${data}`);
+        } catch (err) {
+          ws.close();
+          reject(err);
+        }
+      });
+      ws.addEventListener('error', (err) => reject(new Error(`CDP WebSocket error: ${err.message}`)));
+      setTimeout(() => { ws.close(); reject(new Error('CDP screenshot timeout')); }, 20000);
+    });
+  }
   _resolvePath(p, workDir) {
     return path.isAbsolute(p) ? p : path.join(workDir, p);
   }
@@ -459,6 +611,8 @@ export class OllamaAgent extends EventEmitter {
       case 'web_fetch': {
         try { return `Fetching ${new URL(args.url).hostname}`; } catch { return 'Fetching URL'; }
       }
+      case 'take_screenshot':
+        return `Screenshot: ${args.url || args.target}`;
       default:
         return name;
     }

package/src/supervisor.js CHANGED Viewed

@@ -39,11 +39,18 @@ function removePid(file) {
 export async function runSupervisor(innerArgv) {
   writePid(PID_FILE, process.pid);
+  // SIGTERM on supervisor = intentional stop (from agentforge stop command)
   process.on('SIGTERM', () => {
     console.log('[supervisor] Received SIGTERM — shutting down');
     removePid(PID_FILE);
     process.exit(0);
   });
+  // SIGINT = Ctrl+C in foreground terminal = intentional stop
+  process.on('SIGINT', () => {
+    console.log('[supervisor] Received SIGINT — shutting down');
+    removePid(PID_FILE);
+    process.exit(0);
+  });
   let consecutiveCrashes = 0;

package/src/worker.js CHANGED Viewed

@@ -1756,12 +1756,12 @@ Review and add specific steps, pitfalls, and patterns that helped succeed.
     });
   }
-  async shutdown() {
+  async shutdown(code = 1) {
     console.log('🛑 Shutting down worker...');
     if (this.ws) {
       this.ws.close();
     }
-    process.exit(0);
+    process.exit(code);
   }
   // Find the AgentForge git repo root, regardless of whether worker is globally installed or run from source