nothumanallowed 9.8.1 → 9.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nothumanallowed",
3
- "version": "9.8.1",
3
+ "version": "9.8.3",
4
4
  "description": "NotHumanAllowed — 38 AI agents, 53 tools. Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, GitHub, Notion, Slack, voice chat, 28 languages. Zero-dependency CLI.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -718,12 +718,31 @@ export async function cmdChat(args) {
718
718
  const result = await executeTool(action, params, config);
719
719
  process.stdout.write('\r' + ' '.repeat(80) + '\r');
720
720
 
721
- // Show action-specific result header
722
- const resultHeader = formatToolResult(action, params, result);
723
- console.log(` ${resultHeader}`);
724
- console.log(` ${result.split('\n').join('\n ')}\n`);
725
-
726
- addMessages(conv, input, response + `\n\n[Tool ${action} executed. Result: ${result}]`);
721
+ // Handle screen capture vision result
722
+ if (result && typeof result === 'object' && result.__screenshot) {
723
+ console.log(` ${G}Screenshot captured${NC} — analyzing with vision...\n`);
724
+ try {
725
+ const { callLLMVision } = await import('../services/llm.mjs');
726
+ const visionResponse = await callLLMVision(config,
727
+ 'Describe EXACTLY and ONLY what you see in this screenshot. NEVER invent or fabricate details.',
728
+ `The user said: "${input}"\n\n${result.question}`,
729
+ { base64: result.base64, mimeType: 'image/png' }
730
+ );
731
+ console.log(` ${visionResponse.split('\n').join('\n ')}\n`);
732
+ addMessages(conv, input, response + `\n\n[Screenshot: ${result.path}]\n${visionResponse}`);
733
+ } catch (visionErr) {
734
+ console.log(` ${R}Vision failed: ${visionErr.message}${NC}\n`);
735
+ addMessages(conv, input, response + `\n\n[Screenshot captured but vision failed: ${visionErr.message}]`);
736
+ }
737
+ } else {
738
+ // Show action-specific result header
739
+ const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
740
+ const resultHeader = formatToolResult(action, params, resultStr);
741
+ console.log(` ${resultHeader}`);
742
+ console.log(` ${resultStr.split('\n').join('\n ')}\n`);
743
+
744
+ addMessages(conv, input, response + `\n\n[Tool ${action} executed. Result: ${resultStr}]`);
745
+ }
727
746
  } catch (err) {
728
747
  process.stdout.write('\r' + ' '.repeat(80) + '\r');
729
748
  console.log(` ${R}Error executing ${action}: ${err.message}${NC}\n`);
@@ -1053,18 +1053,49 @@ export async function cmdUI(args) {
1053
1053
 
1054
1054
  // Execute ALL tool actions and collect results
1055
1055
  const toolResults = [];
1056
+ let screenshotData = null; // For vision: { base64, path, question }
1056
1057
  for (const { action, params } of actions) {
1057
1058
  try {
1058
1059
  const result = await executeTool(action, params, config);
1059
- toolResults.push({ action, result: typeof result === 'object' ? JSON.stringify(result) : String(result) });
1060
+ // Intercept structured screenshot result for vision flow
1061
+ if (result && typeof result === 'object' && result.__screenshot) {
1062
+ screenshotData = result;
1063
+ toolResults.push({ action, result: 'Screenshot captured. Analyzing with vision...' });
1064
+ } else {
1065
+ toolResults.push({ action, result: typeof result === 'object' ? JSON.stringify(result) : String(result) });
1066
+ }
1060
1067
  } catch (e) {
1061
1068
  toolResults.push({ action, result: `Error: ${e.message}` });
1062
1069
  }
1063
1070
  }
1064
1071
 
1065
1072
  let fullResponse;
1066
- if (toolResults.length > 0) {
1067
- // Second LLM call with real tool results — forces the LLM to use actual data
1073
+ if (screenshotData && screenshotData.base64) {
1074
+ // VISION FLOW: send screenshot to LLM as image via callLLMVision
1075
+ try {
1076
+ const { callLLMVision } = await import('../services/llm.mjs');
1077
+ const visionPrompt = enrichedSystemPrompt + '\n\nIMPORTANT: You are looking at a REAL screenshot from the user\'s screen. Describe ONLY what you ACTUALLY see. NEVER invent, guess, or fabricate details. If something is unclear, say so.';
1078
+ const question = `The user said: "${body.message}"\n\n${screenshotData.question}\n\nDescribe ONLY what you see. NEVER make up information.`;
1079
+ fullResponse = await callLLMVision(config, visionPrompt, question, {
1080
+ base64: screenshotData.base64,
1081
+ mimeType: 'image/png',
1082
+ });
1083
+ } catch (visionErr) {
1084
+ fullResponse = `I captured a screenshot but vision analysis failed: ${visionErr.message}. To use screen analysis, configure a vision-capable provider (Claude, GPT-4, Gemini).`;
1085
+ }
1086
+ // Prepend screenshot file marker for the UI to display inline
1087
+ if (screenshotData.path) {
1088
+ const fname = screenshotData.path.split('/').pop();
1089
+ // Copy to NHA screenshots dir for persistence
1090
+ try {
1091
+ const ssDir = path.join(NHA_DIR, 'screenshots');
1092
+ fs.mkdirSync(ssDir, { recursive: true });
1093
+ fs.copyFileSync(screenshotData.path, path.join(ssDir, fname));
1094
+ } catch {}
1095
+ fullResponse = `![Screenshot](/api/screenshots/${fname})\n\n${fullResponse}`;
1096
+ }
1097
+ } else if (toolResults.length > 0) {
1098
+ // Standard tool results flow
1068
1099
  const toolContext = toolResults.map(t => {
1069
1100
  let clean = t.result.replace(/\[Screenshot[^\]]*\]/g, '').replace(/!\[.*?\]\(data:image[^)]+\)/g, '').slice(0, 3000);
1070
1101
  return `[${t.action} result]: ${clean.trim()}`;
@@ -1073,7 +1104,6 @@ export async function cmdUI(args) {
1073
1104
  try {
1074
1105
  fullResponse = await callLLM(config, enrichedSystemPrompt, followUp);
1075
1106
  } catch {
1076
- // Fallback: show raw results
1077
1107
  fullResponse = toolResults.map(t => `${t.action}: ${t.result}`).join('\n\n');
1078
1108
  }
1079
1109
  } else {
@@ -1325,6 +1355,40 @@ export async function cmdUI(args) {
1325
1355
  }
1326
1356
 
1327
1357
  const result = await executeTool(action, params, config);
1358
+
1359
+ // Intercept screen capture vision result
1360
+ if (result && typeof result === 'object' && result.__screenshot) {
1361
+ sendSSE('tool', { action, status: 'analyzing_screen' });
1362
+ try {
1363
+ const { callLLMVision } = await import('../services/llm.mjs');
1364
+ const visionPrompt = enrichedPrompt + '\\n\\nIMPORTANT: You are looking at a REAL screenshot. Describe ONLY what you ACTUALLY see. NEVER invent or fabricate.';
1365
+ const question = `The user said: "${msg}"\\n\\n${result.question}`;
1366
+ const visionResponse = await callLLMVision(config, visionPrompt, question, { base64: result.base64, mimeType: 'image/png' });
1367
+
1368
+ // Save screenshot for display
1369
+ const fname = result.path.split('/').pop();
1370
+ const ssDir = path.join(NHA_DIR, 'screenshots');
1371
+ fs.mkdirSync(ssDir, { recursive: true });
1372
+ try { fs.copyFileSync(result.path, path.join(ssDir, fname)); } catch {}
1373
+
1374
+ // Send screenshot to client
1375
+ sendSSE('screenshot', { filename: fname, format: 'png' });
1376
+ if (!res._screenshotFiles) res._screenshotFiles = [];
1377
+ res._screenshotFiles.push(fname);
1378
+
1379
+ // Send vision analysis as tokens
1380
+ sendSSE('tool_synthesis', {});
1381
+ sendSSE('token', { content: visionResponse });
1382
+
1383
+ toolResults.push({ action, result: `[Screen analyzed] ${visionResponse.slice(0, 500)}` });
1384
+ sendSSE('tool', { action, status: 'done', result: 'Screen analyzed' });
1385
+ } catch (visionErr) {
1386
+ toolResults.push({ action, result: `Vision failed: ${visionErr.message}` });
1387
+ sendSSE('tool', { action, status: 'error', error: visionErr.message });
1388
+ }
1389
+ continue;
1390
+ }
1391
+
1328
1392
  const resultStr = typeof result === 'object' ? JSON.stringify(result) : String(result);
1329
1393
  toolResults.push({ action, result: resultStr });
1330
1394
  sendSSE('tool', { action, status: 'done', result: typeof resultStr === 'string' ? resultStr.slice(0, 500) : '' });
package/src/constants.mjs CHANGED
@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
5
5
  const __filename = fileURLToPath(import.meta.url);
6
6
  const __dirname = path.dirname(__filename);
7
7
 
8
- export const VERSION = '9.8.1';
8
+ export const VERSION = '9.8.3';
9
9
  export const BASE_URL = 'https://nothumanallowed.com/cli';
10
10
  export const API_BASE = 'https://nothumanallowed.com/api/v1';
11
11
 
@@ -275,6 +275,96 @@ export async function callLLM(config, systemPrompt, userMessage, opts = {}) {
275
275
  return callFn(apiKey, model, systemPrompt, userMessage, false);
276
276
  }
277
277
 
278
+ /**
279
+ * Call LLM with multimodal (vision) messages — supports image + PDF content.
280
+ * Unified function for ALL vision calls (web UI, CLI, screen capture).
281
+ * @param {object} config
282
+ * @param {string} systemPrompt
283
+ * @param {string} userMessage — text question about the image
284
+ * @param {object} media — { base64, mimeType } (image/png, image/jpeg, application/pdf)
285
+ * @returns {Promise<string>}
286
+ */
287
+ export async function callLLMVision(config, systemPrompt, userMessage, media) {
288
+ const provider = config.llm.provider || 'anthropic';
289
+ const model = config.llm.model || null;
290
+ const apiKey = getApiKey(config, provider);
291
+ if (!apiKey) throw new Error(`No API key for ${provider}. Vision requires Claude, GPT-4, or Gemini.`);
292
+
293
+ const { base64, mimeType } = media;
294
+ if (!base64 || !mimeType) throw new Error('media.base64 and media.mimeType are required');
295
+
296
+ if (provider === 'anthropic') {
297
+ const isPdf = mimeType === 'application/pdf';
298
+ const contentBlock = isPdf
299
+ ? { type: 'document', source: { type: 'base64', media_type: mimeType, data: base64 } }
300
+ : { type: 'image', source: { type: 'base64', media_type: mimeType, data: base64 } };
301
+
302
+ const body = {
303
+ model: model || 'claude-sonnet-4-20250514',
304
+ max_tokens: 4096,
305
+ system: systemPrompt,
306
+ messages: [{ role: 'user', content: [contentBlock, { type: 'text', text: userMessage }] }],
307
+ };
308
+ const res = await fetch('https://api.anthropic.com/v1/messages', {
309
+ method: 'POST',
310
+ headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' },
311
+ body: JSON.stringify(body),
312
+ });
313
+ if (!res.ok) throw new Error(`Anthropic vision ${res.status}: ${(await res.text()).slice(0, 300)}`);
314
+ const data = await res.json();
315
+ return data.content?.[0]?.text || '';
316
+ }
317
+
318
+ if (provider === 'openai' || provider === 'deepseek' || provider === 'grok' || provider === 'mistral') {
319
+ const url = provider === 'openai' ? 'https://api.openai.com/v1/chat/completions'
320
+ : provider === 'deepseek' ? 'https://api.deepseek.com/v1/chat/completions'
321
+ : provider === 'grok' ? 'https://api.x.ai/v1/chat/completions'
322
+ : 'https://api.mistral.ai/v1/chat/completions';
323
+
324
+ const body = {
325
+ model: model || (provider === 'openai' ? 'gpt-4o' : model),
326
+ max_tokens: 4096,
327
+ messages: [
328
+ { role: 'system', content: systemPrompt },
329
+ { role: 'user', content: [
330
+ { type: 'image_url', image_url: { url: `data:${mimeType};base64,${base64}` } },
331
+ { type: 'text', text: userMessage },
332
+ ] },
333
+ ],
334
+ };
335
+ const res = await fetch(url, {
336
+ method: 'POST',
337
+ headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
338
+ body: JSON.stringify(body),
339
+ });
340
+ if (!res.ok) throw new Error(`${provider} vision ${res.status}: ${(await res.text()).slice(0, 300)}`);
341
+ const data = await res.json();
342
+ return data.choices?.[0]?.message?.content || '';
343
+ }
344
+
345
+ if (provider === 'gemini') {
346
+ const geminiModel = model || 'gemini-2.0-flash';
347
+ const body = {
348
+ contents: [{ parts: [
349
+ { inline_data: { mime_type: mimeType, data: base64 } },
350
+ { text: userMessage },
351
+ ] }],
352
+ systemInstruction: { parts: [{ text: systemPrompt }] },
353
+ generationConfig: { maxOutputTokens: 4096 },
354
+ };
355
+ const res = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${geminiModel}:generateContent?key=${apiKey}`, {
356
+ method: 'POST',
357
+ headers: { 'Content-Type': 'application/json' },
358
+ body: JSON.stringify(body),
359
+ });
360
+ if (!res.ok) throw new Error(`Gemini vision ${res.status}: ${(await res.text()).slice(0, 300)}`);
361
+ const data = await res.json();
362
+ return data.candidates?.[0]?.content?.parts?.[0]?.text || '';
363
+ }
364
+
365
+ throw new Error(`Vision not supported for provider: ${provider}. Use Claude, GPT-4, or Gemini.`);
366
+ }
367
+
278
368
  /**
279
369
  * Call an LLM provider with streaming enabled.
280
370
  * Calls onToken(chunk) for each token, returns full text at the end.
@@ -94,7 +94,18 @@ input:focus,textarea:focus{border-color:var(--green3)}
94
94
  .msg{margin-bottom:12px}
95
95
  .msg--user .msg__bubble{background:var(--bg3);border:1px solid var(--border2);border-radius:8px 8px 2px 8px;padding:10px 14px;max-width:85%;margin-left:auto;color:var(--bright)}
96
96
  .msg--assistant .msg__bubble{background:var(--greendim);border:1px solid var(--green3);border-radius:8px 8px 8px 2px;padding:10px 14px;max-width:85%;color:var(--text);white-space:pre-wrap;word-wrap:break-word}
97
+ .msg--assistant .msg__bubble img{max-width:100%;border-radius:8px;margin:8px 0;border:1px solid rgba(0,255,65,0.2)}
97
98
  .msg__label{font-size:10px;color:var(--dim);margin-bottom:2px}
99
+ .msg__actions{display:flex;gap:6px;margin-top:4px;opacity:0.15;transition:opacity 0.2s}
100
+ .msg:hover .msg__actions{opacity:1}
101
+ .msg__actions button{background:none;border:none;color:var(--dim);cursor:pointer;font-size:10px;font-family:var(--mono);padding:2px 4px}
102
+ .msg__actions button:hover{color:var(--green)}
103
+ #canvasPanel{position:fixed;top:60px;right:12px;width:480px;max-height:calc(100vh - 80px);background:#0d0d0d;border:1px solid var(--green);border-radius:12px;z-index:1000;overflow:hidden;display:none;flex-direction:column;box-shadow:0 0 30px rgba(0,255,65,0.1)}
104
+ #canvasPanel.open{display:flex}
105
+ #canvasPanel .cvs-header{display:flex;align-items:center;justify-content:space-between;padding:8px 12px;border-bottom:1px solid var(--green);background:rgba(0,255,65,0.05)}
106
+ #canvasPanel .cvs-header span{font-family:var(--mono);color:var(--green);font-size:12px}
107
+ #canvasPanel .cvs-header button{background:none;border:none;color:var(--dim);cursor:pointer;font-size:14px;margin-left:8px}
108
+ #canvasPanel iframe{flex:1;border:none;background:#fff;min-height:350px;width:100%}
98
109
  .msg--thinking{color:var(--dim);font-style:italic}
99
110
  .tool-indicator{display:inline-block;padding:2px 8px;margin:2px 0;border-radius:4px;font-size:11px;background:var(--bg3);border:1px solid var(--border)}
100
111
  .tool-indicator--browser{border-color:#9c27b0;color:#ce93d8}
@@ -433,17 +444,32 @@ function renderMessages(){
433
444
  el.innerHTML='<div class="chat__empty"><div class="chat__empty-title">NHA Chat</div><div>Personal Operations Assistant — Streaming + Web Search + Browser</div><div class="chat__empty-hint">Try: Show my unread emails / Search the web for React 19 / Open google.com and take a screenshot</div></div>';
434
445
  return;
435
446
  }
436
- var h='';chatHistory.forEach(function(m){
447
+ var h='';chatHistory.forEach(function(m,mi){
437
448
  var raw=m.content||'';
438
- // Strip any raw base64 data that leaked into content (from LLM hallucinations)
449
+ var isA=m.role==='assistant';
450
+ // Strip any raw base64 data that leaked into content
439
451
  raw=raw.replace(/data:image\\/[a-z]+;base64,[A-Za-z0-9+\\/=]{200,}/g,'[image]');
440
452
  raw=raw.replace(/[A-Za-z0-9+\\/=]{500,}/g,'');
453
+ // Handle canvas markers (assistant only)
454
+ if(isA){
455
+ var cm=raw.match(/\\[CANVAS_RENDER\\]([\\s\\S]*?)\\[\\/CANVAS_RENDER\\]/);
456
+ if(cm){try{var cd=JSON.parse(cm[1]);showCanvas(cd.html,cd.title);}catch(e){} raw=raw.replace(/\\[CANVAS_RENDER\\][\\s\\S]*?\\[\\/CANVAS_RENDER\\]/,'').trim();}
457
+ if(raw.indexOf('[CANVAS_CLEAR]')!==-1){closeCanvas();raw=raw.replace(/\\[CANVAS_CLEAR\\][\\s\\S]*?\\[\\/CANVAS_CLEAR\\]/,'').trim();}
458
+ // Handle screenshot file markers
459
+ var sm=raw.match(/\\[SCREENSHOT_FILE\\](.*?)\\[\\/SCREENSHOT_FILE\\]/);
460
+ if(sm){var fn=sm[1].split('/').pop();raw=raw.replace(/\\[SCREENSHOT_FILE\\].*?\\[\\/SCREENSHOT_FILE\\]/,'');raw='![Screenshot](/api/screenshots/'+fn+')\\n'+raw;}
461
+ }
441
462
  var imgs=[];var idx=0;
442
- // Match both /api/screenshots/ URLs and data:image (short ones only, for inline display)
443
463
  var safe=raw.replace(/!\\[([^\\]]*)\\]\\((\\/api\\/screenshots\\/[a-zA-Z0-9._-]+)\\)/g,function(_,alt,src){var ph='__IMG'+idx+'__';imgs.push({ph:ph,alt:alt,src:src});idx++;return ph;});
444
464
  var content=esc(safe);
445
465
  for(var i=0;i<imgs.length;i++){content=content.replace(imgs[i].ph,'<img class="screenshot-preview" alt="'+esc(imgs[i].alt)+'" src="'+imgs[i].src+'">');}
446
- h+='<div class="msg msg--'+esc(m.role)+'"><div class="msg__label">'+esc(m.role==='user'?'You':'NHA')+'</div><div class="msg__bubble">'+content+'</div></div>';
466
+ // Action buttons
467
+ var acts='<div class="msg__actions">';
468
+ acts+='<button onclick="copyMsg('+mi+')">Copy</button>';
469
+ if(isA){acts+='<button onclick="retryMsg('+mi+')">Retry</button>';}
470
+ else{acts+='<button onclick="editMsg('+mi+')">Edit</button>';}
471
+ acts+='</div>';
472
+ h+='<div class="msg msg--'+esc(m.role)+'"><div class="msg__label">'+esc(m.role==='user'?'You':'NHA')+'</div><div class="msg__bubble">'+content+'</div>'+acts+'</div>';
447
473
  });
448
474
  el.innerHTML=h;el.scrollTop=el.scrollHeight;
449
475
  }
@@ -496,6 +522,42 @@ function clearChatAttach(){
496
522
  document.getElementById('chatImageInput').value='';
497
523
  }
498
524
 
525
+ // ---- CANVAS ----
526
+ function showCanvas(html,title){
527
+ var p=document.getElementById('canvasPanel');
528
+ var t=document.getElementById('canvasTitle');
529
+ var f=document.getElementById('canvasFrame');
530
+ if(!p||!f)return;
531
+ if(t)t.textContent=title||'Canvas';
532
+ p.classList.add('open');
533
+ var doc=f.contentDocument||f.contentWindow.document;
534
+ doc.open();doc.write(html);doc.close();
535
+ }
536
+ function closeCanvas(){var p=document.getElementById('canvasPanel');if(p)p.classList.remove('open');}
537
+ function toggleCanvasSize(){
538
+ var p=document.getElementById('canvasPanel');if(!p)return;
539
+ if(p.style.width==='80vw'){p.style.width='';p.style.height='';p.style.top='';p.style.right='';}
540
+ else{p.style.width='80vw';p.style.height='80vh';p.style.top='10vh';p.style.right='10vw';}
541
+ }
542
+ // ---- MSG ACTIONS ----
543
+ function copyMsg(i){
544
+ var m=chatHistory[i];if(!m)return;
545
+ var t=(m.content||'').replace(/\\[CANVAS_RENDER\\][\\s\\S]*?\\[\\/CANVAS_RENDER\\]/g,'').replace(/\\[SCREENSHOT_FILE\\].*?\\[\\/SCREENSHOT_FILE\\]/g,'').trim();
546
+ navigator.clipboard.writeText(t).catch(function(){});
547
+ }
548
+ function retryMsg(i){
549
+ if(i<1||chatHistory[i].role!=='assistant')return;
550
+ var userMsg=chatHistory[i-1];if(!userMsg||userMsg.role!=='user')return;
551
+ chatHistory.splice(i,1);saveChatToStorage();renderMessages();
552
+ var inp=document.getElementById('chatInput');if(inp){inp.value=userMsg.content;}
553
+ sendChat();
554
+ }
555
+ function editMsg(i){
556
+ if(chatHistory[i].role!=='user')return;
557
+ var inp=document.getElementById('chatInput');if(!inp)return;
558
+ inp.value=chatHistory[i].content;inp.focus();
559
+ chatHistory.splice(i);saveChatToStorage();renderMessages();
560
+ }
499
561
  function sendChat(){
500
562
  var inp=document.getElementById('chatInput');if(!inp)return;
501
563
  var msg=inp.value.trim();
@@ -1956,6 +2018,7 @@ init();
1956
2018
  </div>
1957
2019
  </div>
1958
2020
 
2021
+ <div id="canvasPanel"><div class="cvs-header"><span id="canvasTitle">Canvas</span><div><button onclick="toggleCanvasSize()" title="Resize">&#x2922;</button><button onclick="closeCanvas()" title="Close">&times;</button></div></div><iframe id="canvasFrame" sandbox="allow-scripts allow-same-origin"></iframe></div>
1959
2022
  <script>${JS}</script>
1960
2023
  </body>
1961
2024
  </html>`;