npm - nothumanallowed - Versions diffs - 9.8.1 → 9.8.2 - Mend

nothumanallowed 9.8.1 → 9.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nothumanallowed",
-  "version": "9.8.1",
+  "version": "9.8.2",
   "description": "NotHumanAllowed — 38 AI agents, 53 tools. Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, GitHub, Notion, Slack, voice chat, 28 languages. Zero-dependency CLI.",
   "type": "module",
   "bin": {

package/src/commands/chat.mjs CHANGED Viewed

@@ -718,12 +718,31 @@ export async function cmdChat(args) {
           const result = await executeTool(action, params, config);
           process.stdout.write('\r' + ' '.repeat(80) + '\r');
-          // Show action-specific result header
-          const resultHeader = formatToolResult(action, params, result);
-          console.log(`  ${resultHeader}`);
-          console.log(`  ${result.split('\n').join('\n  ')}\n`);
-          addMessages(conv, input, response + `\n\n[Tool ${action} executed. Result: ${result}]`);
+          // Handle screen capture vision result
+          if (result && typeof result === 'object' && result.__screenshot) {
+            console.log(`  ${G}Screenshot captured${NC} — analyzing with vision...\n`);
+            try {
+              const { callLLMVision } = await import('../services/llm.mjs');
+              const visionResponse = await callLLMVision(config,
+                'Describe EXACTLY and ONLY what you see in this screenshot. NEVER invent or fabricate details.',
+                `The user said: "${input}"\n\n${result.question}`,
+                { base64: result.base64, mimeType: 'image/png' }
+              );
+              console.log(`  ${visionResponse.split('\n').join('\n  ')}\n`);
+              addMessages(conv, input, response + `\n\n[Screenshot: ${result.path}]\n${visionResponse}`);
+            } catch (visionErr) {
+              console.log(`  ${R}Vision failed: ${visionErr.message}${NC}\n`);
+              addMessages(conv, input, response + `\n\n[Screenshot captured but vision failed: ${visionErr.message}]`);
+            }
+          } else {
+            // Show action-specific result header
+            const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
+            const resultHeader = formatToolResult(action, params, resultStr);
+            console.log(`  ${resultHeader}`);
+            console.log(`  ${resultStr.split('\n').join('\n  ')}\n`);
+            addMessages(conv, input, response + `\n\n[Tool ${action} executed. Result: ${resultStr}]`);
+          }
         } catch (err) {
           process.stdout.write('\r' + ' '.repeat(80) + '\r');
           console.log(`  ${R}Error executing ${action}: ${err.message}${NC}\n`);

package/src/commands/ui.mjs CHANGED Viewed

@@ -1053,18 +1053,49 @@ export async function cmdUI(args) {
           // Execute ALL tool actions and collect results
           const toolResults = [];
+          let screenshotData = null; // For vision: { base64, path, question }
           for (const { action, params } of actions) {
             try {
               const result = await executeTool(action, params, config);
-              toolResults.push({ action, result: typeof result === 'object' ? JSON.stringify(result) : String(result) });
+              // Intercept structured screenshot result for vision flow
+              if (result && typeof result === 'object' && result.__screenshot) {
+                screenshotData = result;
+                toolResults.push({ action, result: 'Screenshot captured. Analyzing with vision...' });
+              } else {
+                toolResults.push({ action, result: typeof result === 'object' ? JSON.stringify(result) : String(result) });
+              }
             } catch (e) {
               toolResults.push({ action, result: `Error: ${e.message}` });
             }
           }
           let fullResponse;
-          if (toolResults.length > 0) {
-            // Second LLM call with real tool results — forces the LLM to use actual data
+          if (screenshotData && screenshotData.base64) {
+            // VISION FLOW: send screenshot to LLM as image via callLLMVision
+            try {
+              const { callLLMVision } = await import('../services/llm.mjs');
+              const visionPrompt = enrichedSystemPrompt + '\n\nIMPORTANT: You are looking at a REAL screenshot from the user\'s screen. Describe ONLY what you ACTUALLY see. NEVER invent, guess, or fabricate details. If something is unclear, say so.';
+              const question = `The user said: "${body.message}"\n\n${screenshotData.question}\n\nDescribe ONLY what you see. NEVER make up information.`;
+              fullResponse = await callLLMVision(config, visionPrompt, question, {
+                base64: screenshotData.base64,
+                mimeType: 'image/png',
+              });
+            } catch (visionErr) {
+              fullResponse = `I captured a screenshot but vision analysis failed: ${visionErr.message}. To use screen analysis, configure a vision-capable provider (Claude, GPT-4, Gemini).`;
+            }
+            // Prepend screenshot file marker for the UI to display inline
+            if (screenshotData.path) {
+              const fname = screenshotData.path.split('/').pop();
+              // Copy to NHA screenshots dir for persistence
+              try {
+                const ssDir = path.join(NHA_DIR, 'screenshots');
+                fs.mkdirSync(ssDir, { recursive: true });
+                fs.copyFileSync(screenshotData.path, path.join(ssDir, fname));
+              } catch {}
+              fullResponse = `![Screenshot](/api/screenshots/${fname})\n\n${fullResponse}`;
+            }
+          } else if (toolResults.length > 0) {
+            // Standard tool results flow
             const toolContext = toolResults.map(t => {
               let clean = t.result.replace(/\[Screenshot[^\]]*\]/g, '').replace(/!\[.*?\]\(data:image[^)]+\)/g, '').slice(0, 3000);
               return `[${t.action} result]: ${clean.trim()}`;
@@ -1073,7 +1104,6 @@ export async function cmdUI(args) {
             try {
               fullResponse = await callLLM(config, enrichedSystemPrompt, followUp);
             } catch {
-              // Fallback: show raw results
               fullResponse = toolResults.map(t => `${t.action}: ${t.result}`).join('\n\n');
             }
           } else {
@@ -1325,6 +1355,40 @@ export async function cmdUI(args) {
               }
               const result = await executeTool(action, params, config);
+              // Intercept screen capture vision result
+              if (result && typeof result === 'object' && result.__screenshot) {
+                sendSSE('tool', { action, status: 'analyzing_screen' });
+                try {
+                  const { callLLMVision } = await import('../services/llm.mjs');
+                  const visionPrompt = enrichedPrompt + '\\n\\nIMPORTANT: You are looking at a REAL screenshot. Describe ONLY what you ACTUALLY see. NEVER invent or fabricate.';
+                  const question = `The user said: "${msg}"\\n\\n${result.question}`;
+                  const visionResponse = await callLLMVision(config, visionPrompt, question, { base64: result.base64, mimeType: 'image/png' });
+                  // Save screenshot for display
+                  const fname = result.path.split('/').pop();
+                  const ssDir = path.join(NHA_DIR, 'screenshots');
+                  fs.mkdirSync(ssDir, { recursive: true });
+                  try { fs.copyFileSync(result.path, path.join(ssDir, fname)); } catch {}
+                  // Send screenshot to client
+                  sendSSE('screenshot', { filename: fname, format: 'png' });
+                  if (!res._screenshotFiles) res._screenshotFiles = [];
+                  res._screenshotFiles.push(fname);
+                  // Send vision analysis as tokens
+                  sendSSE('tool_synthesis', {});
+                  sendSSE('token', { content: visionResponse });
+                  toolResults.push({ action, result: `[Screen analyzed] ${visionResponse.slice(0, 500)}` });
+                  sendSSE('tool', { action, status: 'done', result: 'Screen analyzed' });
+                } catch (visionErr) {
+                  toolResults.push({ action, result: `Vision failed: ${visionErr.message}` });
+                  sendSSE('tool', { action, status: 'error', error: visionErr.message });
+                }
+                continue;
+              }
               const resultStr = typeof result === 'object' ? JSON.stringify(result) : String(result);
               toolResults.push({ action, result: resultStr });
               sendSSE('tool', { action, status: 'done', result: typeof resultStr === 'string' ? resultStr.slice(0, 500) : '' });

package/src/constants.mjs CHANGED Viewed

@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
-export const VERSION = '9.8.1';
+export const VERSION = '9.8.2';
 export const BASE_URL = 'https://nothumanallowed.com/cli';
 export const API_BASE = 'https://nothumanallowed.com/api/v1';

package/src/services/llm.mjs CHANGED Viewed

@@ -275,6 +275,96 @@ export async function callLLM(config, systemPrompt, userMessage, opts = {}) {
   return callFn(apiKey, model, systemPrompt, userMessage, false);
 }
+/**
+ * Call LLM with multimodal (vision) messages — supports image + PDF content.
+ * Unified function for ALL vision calls (web UI, CLI, screen capture).
+ * @param {object} config
+ * @param {string} systemPrompt
+ * @param {string} userMessage — text question about the image
+ * @param {object} media — { base64, mimeType } (image/png, image/jpeg, application/pdf)
+ * @returns {Promise<string>}
+ */
+export async function callLLMVision(config, systemPrompt, userMessage, media) {
+  const provider = config.llm.provider || 'anthropic';
+  const model = config.llm.model || null;
+  const apiKey = getApiKey(config, provider);
+  if (!apiKey) throw new Error(`No API key for ${provider}. Vision requires Claude, GPT-4, or Gemini.`);
+  const { base64, mimeType } = media;
+  if (!base64 || !mimeType) throw new Error('media.base64 and media.mimeType are required');
+  if (provider === 'anthropic') {
+    const isPdf = mimeType === 'application/pdf';
+    const contentBlock = isPdf
+      ? { type: 'document', source: { type: 'base64', media_type: mimeType, data: base64 } }
+      : { type: 'image', source: { type: 'base64', media_type: mimeType, data: base64 } };
+    const body = {
+      model: model || 'claude-sonnet-4-20250514',
+      max_tokens: 4096,
+      system: systemPrompt,
+      messages: [{ role: 'user', content: [contentBlock, { type: 'text', text: userMessage }] }],
+    };
+    const res = await fetch('https://api.anthropic.com/v1/messages', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' },
+      body: JSON.stringify(body),
+    });
+    if (!res.ok) throw new Error(`Anthropic vision ${res.status}: ${(await res.text()).slice(0, 300)}`);
+    const data = await res.json();
+    return data.content?.[0]?.text || '';
+  }
+  if (provider === 'openai' || provider === 'deepseek' || provider === 'grok' || provider === 'mistral') {
+    const url = provider === 'openai' ? 'https://api.openai.com/v1/chat/completions'
+              : provider === 'deepseek' ? 'https://api.deepseek.com/v1/chat/completions'
+              : provider === 'grok' ? 'https://api.x.ai/v1/chat/completions'
+              : 'https://api.mistral.ai/v1/chat/completions';
+    const body = {
+      model: model || (provider === 'openai' ? 'gpt-4o' : model),
+      max_tokens: 4096,
+      messages: [
+        { role: 'system', content: systemPrompt },
+        { role: 'user', content: [
+          { type: 'image_url', image_url: { url: `data:${mimeType};base64,${base64}` } },
+          { type: 'text', text: userMessage },
+        ] },
+      ],
+    };
+    const res = await fetch(url, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
+      body: JSON.stringify(body),
+    });
+    if (!res.ok) throw new Error(`${provider} vision ${res.status}: ${(await res.text()).slice(0, 300)}`);
+    const data = await res.json();
+    return data.choices?.[0]?.message?.content || '';
+  }
+  if (provider === 'gemini') {
+    const geminiModel = model || 'gemini-2.0-flash';
+    const body = {
+      contents: [{ parts: [
+        { inline_data: { mime_type: mimeType, data: base64 } },
+        { text: userMessage },
+      ] }],
+      systemInstruction: { parts: [{ text: systemPrompt }] },
+      generationConfig: { maxOutputTokens: 4096 },
+    };
+    const res = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${geminiModel}:generateContent?key=${apiKey}`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+    });
+    if (!res.ok) throw new Error(`Gemini vision ${res.status}: ${(await res.text()).slice(0, 300)}`);
+    const data = await res.json();
+    return data.candidates?.[0]?.content?.parts?.[0]?.text || '';
+  }
+  throw new Error(`Vision not supported for provider: ${provider}. Use Claude, GPT-4, or Gemini.`);
+}
 /**
  * Call an LLM provider with streaming enabled.
  * Calls onToken(chunk) for each token, returns full text at the end.