npm - nothumanallowed - Versions diffs - 9.6.0 → 9.7.0 - Mend

nothumanallowed 9.6.0 → 9.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/commands/ui.mjs +37 -7
package/src/constants.mjs +1 -1
package/src/services/llm.mjs +107 -0
package/src/services/tool-executor.mjs +10 -17
package/src/services/web-ui.mjs +66 -18

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nothumanallowed",
-  "version": "9.6.0",
+  "version": "9.7.0",
   "description": "NotHumanAllowed — 38 AI agents, 53 tools. Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, GitHub, Notion, Slack, voice chat, 28 languages. Zero-dependency CLI.",
   "type": "module",
   "bin": {

package/src/commands/ui.mjs CHANGED Viewed

@@ -15,7 +15,7 @@ import fs from 'fs';
 import path from 'path';
 import { loadConfig } from '../config.mjs';
 import { detectMailProvider, hasMailProvider, getProviderStatus } from '../services/mail-router.mjs';
-import { callLLM, callAgent, parseAgentFile } from '../services/llm.mjs';
+import { callLLM, callLLMVision, callAgent, parseAgentFile } from '../services/llm.mjs';
 import { getUnreadImportant, getMessage, listMessages, sendEmail, createDraft } from '../services/mail-router.mjs';
 import { getTodayEvents, getUpcomingEvents, createEvent, updateEvent, getEventsForDate } from '../services/mail-router.mjs';
 import {
@@ -158,7 +158,12 @@ export async function cmdUI(args) {
   const UI_PERSONA = `You are NHA Chat, a personal operations assistant inside the NotHumanAllowed web UI. ` +
     `You help the user manage their emails, calendar, tasks, GitHub issues, Notion pages, and Slack channels through natural conversation. ` +
     `Be concise, helpful, and proactive. When presenting data, format it clearly. ` +
-    `Never output raw JSON to the user.`;
+    `Never output raw JSON to the user.\n\n` +
+    `ABSOLUTE RULE — NEVER LIE: You MUST ALWAYS tell the truth. NEVER fabricate, invent, or guess information. ` +
+    `If you don't know something, say "I don't know." If a tool fails, say it failed. If you cannot see something, say you cannot see it. ` +
+    `If you receive a screenshot but cannot analyze it (no vision support), say so honestly. ` +
+    `NEVER describe things you haven't actually seen or data you haven't actually received. ` +
+    `Honesty is MORE important than being helpful. A truthful "I don't know" is ALWAYS better than a fabricated answer.`;
   const chatSystemPrompt = buildSystemPrompt('NHA UI', UI_PERSONA, config);
   // ── Route Handlers ──────────────────────────────────────────────────────
@@ -675,24 +680,49 @@ export async function cmdUI(args) {
           // Execute ALL tool actions and collect results
           const toolResults = [];
+          let screenshotData = null; // For vision: { base64, path, question }
+          let screenshotFiles = [];  // For displaying inline
           for (const { action, params } of actions) {
             try {
               const result = await executeTool(action, params, config);
-              toolResults.push({ action, result: typeof result === 'object' ? JSON.stringify(result) : String(result) });
+              // Check if result is a structured screenshot object
+              if (result && typeof result === 'object' && result.__screenshot) {
+                screenshotData = result;
+                screenshotFiles.push(result.path);
+                toolResults.push({ action, result: 'Screenshot captured. Analyzing with vision...' });
+              } else {
+                toolResults.push({ action, result: typeof result === 'object' ? JSON.stringify(result) : String(result) });
+              }
             } catch (e) {
               toolResults.push({ action, result: `Error: ${e.message}` });
             }
           }
           let fullResponse;
-          if (toolResults.length > 0) {
-            // Second LLM call with real tool results — forces the LLM to use actual data
+          if (screenshotData && screenshotData.base64) {
+            // VISION FLOW: send screenshot image to LLM as multimodal content
+            try {
+              const visionMessages = [
+                { role: 'system', content: enrichedSystemPrompt + '\n\nIMPORTANT: You are looking at a REAL screenshot from the user\'s screen. Describe ONLY what you ACTUALLY see. NEVER invent, guess, or fabricate details. If something is unclear, say so. Be specific about windows, text, UI elements you can identify.' },
+                { role: 'user', content: [
+                  { type: 'image_url', image_url: { url: `data:image/png;base64,${screenshotData.base64}` } },
+                  { type: 'text', text: `The user said: "${body.message}"\n\n${screenshotData.question}\n\nDescribe ONLY what you see. NEVER make up information.` },
+                ] },
+              ];
+              fullResponse = await callLLMVision(config, visionMessages);
+            } catch (visionErr) {
+              // Fallback: try regular call explaining we can't do vision
+              fullResponse = `I captured a screenshot but your current LLM provider doesn't support vision/image analysis. The screenshot is saved at: ${screenshotData.path}\n\nTo use screen analysis, configure a vision-capable provider (Claude, GPT-4, Gemini).`;
+            }
+            // Prepend screenshot file marker for the UI to display
+            fullResponse = `[SCREENSHOT_FILE]${screenshotData.path}[/SCREENSHOT_FILE]\n${fullResponse}`;
+          } else if (toolResults.length > 0) {
+            // Standard tool results flow
             const toolContext = toolResults.map(t => `[${t.action} result]: ${t.result}`).join('\n\n');
             const followUp = `The user asked: "${body.message}"\n\nI executed these tools and got REAL results:\n\n${toolContext}\n\nNow respond to the user based ONLY on the REAL data above. Do NOT invent or fabricate any information. Present the actual results clearly.`;
             try {
               fullResponse = await callLLM(config, enrichedSystemPrompt, followUp);
             } catch {
-              // Fallback: show raw results
               fullResponse = toolResults.map(t => `${t.action}: ${t.result}`).join('\n\n');
             }
           } else {
@@ -708,7 +738,7 @@ export async function cmdUI(args) {
           } catch { /* non-critical */ }
           try { extractMemory('chat', body.message, fullResponse); } catch { /* non-critical */ }
-          sendJSON(res, 200, { response: fullResponse, toolResults, actions });
+          sendJSON(res, 200, { response: fullResponse, toolResults, actions, screenshotFiles });
         } catch (e) {
           sendJSON(res, 200, { response: null, error: e.message });
         }

package/src/constants.mjs CHANGED Viewed

@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
-export const VERSION = '9.6.0';
+export const VERSION = '9.7.0';
 export const BASE_URL = 'https://nothumanallowed.com/cli';
 export const API_BASE = 'https://nothumanallowed.com/api/v1';

package/src/services/llm.mjs CHANGED Viewed

@@ -275,6 +275,113 @@ export async function callLLM(config, systemPrompt, userMessage, opts = {}) {
   return callFn(apiKey, model, systemPrompt, userMessage, false);
 }
+/**
+ * Call LLM with multimodal (vision) messages — supports image content.
+ * Uses the provider's native vision format.
+ * @param {object} config
+ * @param {Array} messages - Array of { role, content } where content can be string or array of content blocks
+ * @returns {Promise<string>}
+ */
+export async function callLLMVision(config, messages) {
+  const provider = config.llm.provider || 'anthropic';
+  const model = config.llm.model || null;
+  const apiKey = getApiKey(config, provider);
+  if (!apiKey) throw new Error(`No API key for ${provider}. Vision requires Claude, GPT-4, or Gemini.`);
+  if (provider === 'anthropic') {
+    // Anthropic format: system separate, messages with content blocks
+    const systemMsg = messages.find(m => m.role === 'system');
+    const userMsgs = messages.filter(m => m.role !== 'system');
+    // Convert OpenAI-style image_url to Anthropic format
+    const anthropicMessages = userMsgs.map(m => {
+      if (typeof m.content === 'string') return m;
+      const blocks = m.content.map(block => {
+        if (block.type === 'text') return block;
+        if (block.type === 'image_url') {
+          const url = block.image_url.url;
+          const match = url.match(/^data:image\/(png|jpeg|gif|webp);base64,(.+)$/);
+          if (match) {
+            return { type: 'image', source: { type: 'base64', media_type: `image/${match[1]}`, data: match[2] } };
+          }
+        }
+        return block;
+      });
+      return { role: m.role, content: blocks };
+    });
+    const body = {
+      model: model || 'claude-sonnet-4-20250514',
+      max_tokens: 4096,
+      system: systemMsg?.content || '',
+      messages: anthropicMessages,
+    };
+    const res = await fetch('https://api.anthropic.com/v1/messages', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' },
+      body: JSON.stringify(body),
+    });
+    if (!res.ok) throw new Error(`Anthropic vision ${res.status}: ${await res.text()}`);
+    const data = await res.json();
+    return data.content?.[0]?.text || '';
+  }
+  if (provider === 'openai' || provider === 'deepseek' || provider === 'grok' || provider === 'mistral') {
+    // OpenAI-compatible format — works with GPT-4V, DeepSeek VL, etc.
+    const url = provider === 'openai' ? 'https://api.openai.com/v1/chat/completions'
+              : provider === 'deepseek' ? 'https://api.deepseek.com/chat/completions'
+              : provider === 'grok' ? 'https://api.x.ai/v1/chat/completions'
+              : 'https://api.mistral.ai/v1/chat/completions';
+    const visionModel = model || (provider === 'openai' ? 'gpt-4o' : model);
+    const body = { model: visionModel, max_tokens: 4096, messages };
+    const res = await fetch(url, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
+      body: JSON.stringify(body),
+    });
+    if (!res.ok) throw new Error(`${provider} vision ${res.status}: ${await res.text()}`);
+    const data = await res.json();
+    return data.choices?.[0]?.message?.content || '';
+  }
+  if (provider === 'gemini') {
+    // Gemini format — inline_data with base64
+    const systemMsg = messages.find(m => m.role === 'system');
+    const userMsgs = messages.filter(m => m.role !== 'system');
+    const parts = [];
+    for (const msg of userMsgs) {
+      if (typeof msg.content === 'string') {
+        parts.push({ text: msg.content });
+      } else {
+        for (const block of msg.content) {
+          if (block.type === 'text') parts.push({ text: block.text });
+          if (block.type === 'image_url') {
+            const match = block.image_url.url.match(/^data:image\/(.*?);base64,(.+)$/);
+            if (match) parts.push({ inline_data: { mime_type: `image/${match[1]}`, data: match[2] } });
+          }
+        }
+      }
+    }
+    const geminiModel = model || 'gemini-2.0-flash';
+    const body = {
+      contents: [{ parts }],
+      systemInstruction: systemMsg ? { parts: [{ text: systemMsg.content }] } : undefined,
+    };
+    const res = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${geminiModel}:generateContent?key=${apiKey}`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+    });
+    if (!res.ok) throw new Error(`Gemini vision ${res.status}: ${await res.text()}`);
+    const data = await res.json();
+    return data.candidates?.[0]?.content?.parts?.[0]?.text || '';
+  }
+  throw new Error(`Vision not supported for provider: ${provider}. Use Claude, GPT-4, or Gemini.`);
+}
 /**
  * Call an agent by name — loads the agent file, calls LLM, returns response.
  * No streaming. Used by PAO pipeline for batch agent calls.

package/src/services/tool-executor.mjs CHANGED Viewed

@@ -1060,28 +1060,21 @@ export async function executeTool(action, params, config) {
     }
     // ── Screen Capture + Vision ────────────────────────────────────────
-    case 'screen_capture': {
-      const { captureScreen } = await import('./screen-capture.mjs');
-      const result = captureScreen({ monitor: params.monitor || 1 });
-      if (!result.ok) return `Screen capture failed: ${result.error}`;
-      // Save screenshot to file for the UI to display
-      const screenshotPath = result.path;
-      // Return a compact description + mark that we have an image
-      // The calling layer (chat/ui) will handle displaying the image
-      return `[SCREENSHOT]${screenshotPath}[/SCREENSHOT]\nScreenshot captured successfully. I can see your screen. What would you like me to analyze?`;
-    }
+    case 'screen_capture':
     case 'screen_analyze': {
       const { captureScreen } = await import('./screen-capture.mjs');
       const result = captureScreen({ monitor: params.monitor || 1 });
       if (!result.ok) return `Screen capture failed: ${result.error}`;
-      // For vision analysis, we need to send the image to the LLM
-      // Return the base64 with a marker so the chat layer sends it as a vision message
-      const question = params.question || 'Describe what you see on screen.';
-      return `[VISION_REQUEST]${result.base64}[/VISION_REQUEST]\n[VISION_QUESTION]${question}[/VISION_QUESTION]`;
+      // Return structured result — the chat layer intercepts __screenshot to send as vision
+      const question = params.question || 'Describe EXACTLY and ONLY what you see in this screenshot. Do NOT invent or guess anything.';
+      return {
+        __screenshot: true,
+        path: result.path,
+        base64: result.base64,
+        question,
+        displayText: `[SCREENSHOT_FILE]${result.path}[/SCREENSHOT_FILE]`,
+      };
     }
     // ── Canvas ───────────────────────────────────────────────────────────

package/src/services/web-ui.mjs CHANGED Viewed

@@ -330,30 +330,78 @@ function renderMessages(){
     el.innerHTML='<div class="chat__empty"><div class="chat__empty-title">NHA Chat</div><div>Personal Operations Assistant</div><div class="chat__empty-hint">Try: Show my unread emails / What is on my calendar? / Add a task</div></div>';
     return;
   }
-  var h='';chatHistory.forEach(function(m){
+  var h='';chatHistory.forEach(function(m,idx){
     var content = m.content || '';
-    // Handle canvas render markers
-    var canvasMatch = content.match(/\\[CANVAS_RENDER\\](.*?)\\[\\/CANVAS_RENDER\\]/s);
-    if (canvasMatch) {
-      try { var cd = JSON.parse(canvasMatch[1]); showCanvas(cd.html, cd.title); } catch(e){}
-      content = content.replace(/\\[CANVAS_RENDER\\].*?\\[\\/CANVAS_RENDER\\]/s, '').trim();
-    }
-    if (content.indexOf('[CANVAS_CLEAR]') !== -1) {
-      closeCanvas();
-      content = content.replace(/\\[CANVAS_CLEAR\\].*?\\[\\/CANVAS_CLEAR\\]/s, '').trim();
+    var isAssistant = m.role === 'assistant';
+    var extraHtml = '';
+    if (isAssistant) {
+      // Handle canvas render markers
+      var canvasMatch = content.match(/\[CANVAS_RENDER\]([\s\S]*?)\[\/CANVAS_RENDER\]/);
+      if (canvasMatch) {
+        try { var cd = JSON.parse(canvasMatch[1]); showCanvas(cd.html, cd.title); } catch(e){}
+        content = content.replace(/\[CANVAS_RENDER\][\s\S]*?\[\/CANVAS_RENDER\]/, '').trim();
+      }
+      if (content.indexOf('[CANVAS_CLEAR]') !== -1) {
+        closeCanvas();
+        content = content.replace(/\[CANVAS_CLEAR\][\s\S]*?\[\/CANVAS_CLEAR\]/, '').trim();
+      }
+      // Handle screenshot file markers
+      var ssMatch = content.match(/\[SCREENSHOT_FILE\](.*?)\[\/SCREENSHOT_FILE\]/);
+      if (ssMatch) {
+        var fname = ssMatch[1].split('/').pop();
+        extraHtml = '<img src="/api/screenshots/' + encodeURIComponent(fname) + '" style="max-width:100%;border-radius:8px;margin:8px 0;border:1px solid rgba(0,255,65,0.2)" />';
+        content = content.replace(/\[SCREENSHOT_FILE\].*?\[\/SCREENSHOT_FILE\]/, '').trim();
+      }
     }
-    // Handle screenshot markers
-    var screenshotMatch = content.match(/\\[SCREENSHOT\\](.*?)\\[\\/SCREENSHOT\\]/);
-    if (screenshotMatch) {
-      var imgPath = screenshotMatch[1];
-      content = content.replace(/\\[SCREENSHOT\\].*?\\[\\/SCREENSHOT\\]/, '');
-      content = '<img src="/api/screenshots/' + encodeURIComponent(imgPath.split('/').pop()) + '" style="max-width:100%;border-radius:8px;margin:8px 0" />' + content;
+    var bubbleContent = isAssistant ? extraHtml + esc(content).replace(/\n/g, '<br>') : esc(content).replace(/\n/g, '<br>');
+    // Action buttons for each message
+    var actions = '<div class="msg__actions" style="display:flex;gap:6px;margin-top:4px;opacity:0.3">';
+    actions += '<button onclick="copyMessage('+idx+')" style="background:none;border:none;color:var(--dim);cursor:pointer;font-size:10px;font-family:var(--mono)" title="Copy">Copy</button>';
+    if (isAssistant) {
+      actions += '<button onclick="retryMessage('+idx+')" style="background:none;border:none;color:var(--dim);cursor:pointer;font-size:10px;font-family:var(--mono)" title="Retry">Retry</button>';
+    } else {
+      actions += '<button onclick="editMessage('+idx+')" style="background:none;border:none;color:var(--dim);cursor:pointer;font-size:10px;font-family:var(--mono)" title="Edit">Edit</button>';
     }
-    var bubbleContent = m.role === 'assistant' ? content : esc(content);
-    h+='<div class="msg msg--'+esc(m.role)+'"><div class="msg__label">'+esc(m.role==='user'?'You':'NHA')+'</div><div class="msg__bubble">'+bubbleContent+'</div></div>';
+    actions += '</div>';
+    h+='<div class="msg msg--'+esc(m.role)+'" onmouseenter="this.querySelector(\'.msg__actions\').style.opacity=1" onmouseleave="this.querySelector(\'.msg__actions\').style.opacity=0.3"><div class="msg__label">'+esc(m.role==='user'?'You':'NHA')+'</div><div class="msg__bubble">'+bubbleContent+'</div>'+actions+'</div>';
   });
   el.innerHTML=h;el.scrollTop=el.scrollHeight;
 }
+function copyMessage(idx){
+  var m=chatHistory[idx];if(!m)return;
+  var text=m.content.replace(/\[SCREENSHOT_FILE\].*?\[\/SCREENSHOT_FILE\]/g,'').replace(/\[CANVAS_RENDER\][\s\S]*?\[\/CANVAS_RENDER\]/g,'').trim();
+  navigator.clipboard.writeText(text).then(function(){showToast('copy','Copied','Message copied to clipboard',2000)}).catch(function(){});
+}
+function retryMessage(idx){
+  // Retry = re-send the user message that preceded this assistant message
+  if(idx<1||chatHistory[idx].role!=='assistant')return;
+  var userMsg=chatHistory[idx-1];
+  if(!userMsg||userMsg.role!=='user')return;
+  // Remove this assistant response and re-send
+  chatHistory.splice(idx,1);
+  saveChatToStorage();renderMessages();
+  chatHistory.push({role:'assistant',content:'Thinking...'});renderMessages();
+  apiPost('/api/chat',{message:userMsg.content,history:chatHistory.slice(0,-1)}).then(function(r){
+    chatHistory.pop();
+    if(r&&r.response){chatHistory.push({role:'assistant',content:r.response})}
+    else if(r&&r.error){chatHistory.push({role:'assistant',content:'Error: '+r.error})}
+    else{chatHistory.push({role:'assistant',content:'Error: no response from server'})}
+    saveChatToStorage();renderMessages();
+  });
+}
+function editMessage(idx){
+  if(chatHistory[idx].role!=='user')return;
+  var inp=document.getElementById('chatInput');if(!inp)return;
+  inp.value=chatHistory[idx].content;
+  inp.focus();
+  // Remove this message and all subsequent messages
+  chatHistory.splice(idx);
+  saveChatToStorage();renderMessages();
+}
 function sendChat(){
   var inp=document.getElementById('chatInput');if(!inp)return;
   var msg=inp.value.trim();if(!msg)return;