nothumanallowed 9.8.1 → 9.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/chat.mjs +25 -6
- package/src/commands/ui.mjs +68 -4
- package/src/constants.mjs +1 -1
- package/src/services/llm.mjs +90 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nothumanallowed",
|
|
3
|
-
"version": "9.8.
|
|
3
|
+
"version": "9.8.2",
|
|
4
4
|
"description": "NotHumanAllowed — 38 AI agents, 53 tools. Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, GitHub, Notion, Slack, voice chat, 28 languages. Zero-dependency CLI.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
package/src/commands/chat.mjs
CHANGED
|
@@ -718,12 +718,31 @@ export async function cmdChat(args) {
|
|
|
718
718
|
const result = await executeTool(action, params, config);
|
|
719
719
|
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
720
720
|
|
|
721
|
-
//
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
721
|
+
// Handle screen capture vision result
|
|
722
|
+
if (result && typeof result === 'object' && result.__screenshot) {
|
|
723
|
+
console.log(` ${G}Screenshot captured${NC} — analyzing with vision...\n`);
|
|
724
|
+
try {
|
|
725
|
+
const { callLLMVision } = await import('../services/llm.mjs');
|
|
726
|
+
const visionResponse = await callLLMVision(config,
|
|
727
|
+
'Describe EXACTLY and ONLY what you see in this screenshot. NEVER invent or fabricate details.',
|
|
728
|
+
`The user said: "${input}"\n\n${result.question}`,
|
|
729
|
+
{ base64: result.base64, mimeType: 'image/png' }
|
|
730
|
+
);
|
|
731
|
+
console.log(` ${visionResponse.split('\n').join('\n ')}\n`);
|
|
732
|
+
addMessages(conv, input, response + `\n\n[Screenshot: ${result.path}]\n${visionResponse}`);
|
|
733
|
+
} catch (visionErr) {
|
|
734
|
+
console.log(` ${R}Vision failed: ${visionErr.message}${NC}\n`);
|
|
735
|
+
addMessages(conv, input, response + `\n\n[Screenshot captured but vision failed: ${visionErr.message}]`);
|
|
736
|
+
}
|
|
737
|
+
} else {
|
|
738
|
+
// Show action-specific result header
|
|
739
|
+
const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
|
|
740
|
+
const resultHeader = formatToolResult(action, params, resultStr);
|
|
741
|
+
console.log(` ${resultHeader}`);
|
|
742
|
+
console.log(` ${resultStr.split('\n').join('\n ')}\n`);
|
|
743
|
+
|
|
744
|
+
addMessages(conv, input, response + `\n\n[Tool ${action} executed. Result: ${resultStr}]`);
|
|
745
|
+
}
|
|
727
746
|
} catch (err) {
|
|
728
747
|
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
729
748
|
console.log(` ${R}Error executing ${action}: ${err.message}${NC}\n`);
|
package/src/commands/ui.mjs
CHANGED
|
@@ -1053,18 +1053,49 @@ export async function cmdUI(args) {
|
|
|
1053
1053
|
|
|
1054
1054
|
// Execute ALL tool actions and collect results
|
|
1055
1055
|
const toolResults = [];
|
|
1056
|
+
let screenshotData = null; // For vision: { base64, path, question }
|
|
1056
1057
|
for (const { action, params } of actions) {
|
|
1057
1058
|
try {
|
|
1058
1059
|
const result = await executeTool(action, params, config);
|
|
1059
|
-
|
|
1060
|
+
// Intercept structured screenshot result for vision flow
|
|
1061
|
+
if (result && typeof result === 'object' && result.__screenshot) {
|
|
1062
|
+
screenshotData = result;
|
|
1063
|
+
toolResults.push({ action, result: 'Screenshot captured. Analyzing with vision...' });
|
|
1064
|
+
} else {
|
|
1065
|
+
toolResults.push({ action, result: typeof result === 'object' ? JSON.stringify(result) : String(result) });
|
|
1066
|
+
}
|
|
1060
1067
|
} catch (e) {
|
|
1061
1068
|
toolResults.push({ action, result: `Error: ${e.message}` });
|
|
1062
1069
|
}
|
|
1063
1070
|
}
|
|
1064
1071
|
|
|
1065
1072
|
let fullResponse;
|
|
1066
|
-
if (
|
|
1067
|
-
//
|
|
1073
|
+
if (screenshotData && screenshotData.base64) {
|
|
1074
|
+
// VISION FLOW: send screenshot to LLM as image via callLLMVision
|
|
1075
|
+
try {
|
|
1076
|
+
const { callLLMVision } = await import('../services/llm.mjs');
|
|
1077
|
+
const visionPrompt = enrichedSystemPrompt + '\n\nIMPORTANT: You are looking at a REAL screenshot from the user\'s screen. Describe ONLY what you ACTUALLY see. NEVER invent, guess, or fabricate details. If something is unclear, say so.';
|
|
1078
|
+
const question = `The user said: "${body.message}"\n\n${screenshotData.question}\n\nDescribe ONLY what you see. NEVER make up information.`;
|
|
1079
|
+
fullResponse = await callLLMVision(config, visionPrompt, question, {
|
|
1080
|
+
base64: screenshotData.base64,
|
|
1081
|
+
mimeType: 'image/png',
|
|
1082
|
+
});
|
|
1083
|
+
} catch (visionErr) {
|
|
1084
|
+
fullResponse = `I captured a screenshot but vision analysis failed: ${visionErr.message}. To use screen analysis, configure a vision-capable provider (Claude, GPT-4, Gemini).`;
|
|
1085
|
+
}
|
|
1086
|
+
// Prepend screenshot file marker for the UI to display inline
|
|
1087
|
+
if (screenshotData.path) {
|
|
1088
|
+
const fname = screenshotData.path.split('/').pop();
|
|
1089
|
+
// Copy to NHA screenshots dir for persistence
|
|
1090
|
+
try {
|
|
1091
|
+
const ssDir = path.join(NHA_DIR, 'screenshots');
|
|
1092
|
+
fs.mkdirSync(ssDir, { recursive: true });
|
|
1093
|
+
fs.copyFileSync(screenshotData.path, path.join(ssDir, fname));
|
|
1094
|
+
} catch {}
|
|
1095
|
+
fullResponse = `\n\n${fullResponse}`;
|
|
1096
|
+
}
|
|
1097
|
+
} else if (toolResults.length > 0) {
|
|
1098
|
+
// Standard tool results flow
|
|
1068
1099
|
const toolContext = toolResults.map(t => {
|
|
1069
1100
|
let clean = t.result.replace(/\[Screenshot[^\]]*\]/g, '').replace(/!\[.*?\]\(data:image[^)]+\)/g, '').slice(0, 3000);
|
|
1070
1101
|
return `[${t.action} result]: ${clean.trim()}`;
|
|
@@ -1073,7 +1104,6 @@ export async function cmdUI(args) {
|
|
|
1073
1104
|
try {
|
|
1074
1105
|
fullResponse = await callLLM(config, enrichedSystemPrompt, followUp);
|
|
1075
1106
|
} catch {
|
|
1076
|
-
// Fallback: show raw results
|
|
1077
1107
|
fullResponse = toolResults.map(t => `${t.action}: ${t.result}`).join('\n\n');
|
|
1078
1108
|
}
|
|
1079
1109
|
} else {
|
|
@@ -1325,6 +1355,40 @@ export async function cmdUI(args) {
|
|
|
1325
1355
|
}
|
|
1326
1356
|
|
|
1327
1357
|
const result = await executeTool(action, params, config);
|
|
1358
|
+
|
|
1359
|
+
// Intercept screen capture vision result
|
|
1360
|
+
if (result && typeof result === 'object' && result.__screenshot) {
|
|
1361
|
+
sendSSE('tool', { action, status: 'analyzing_screen' });
|
|
1362
|
+
try {
|
|
1363
|
+
const { callLLMVision } = await import('../services/llm.mjs');
|
|
1364
|
+
const visionPrompt = enrichedPrompt + '\\n\\nIMPORTANT: You are looking at a REAL screenshot. Describe ONLY what you ACTUALLY see. NEVER invent or fabricate.';
|
|
1365
|
+
const question = `The user said: "${msg}"\\n\\n${result.question}`;
|
|
1366
|
+
const visionResponse = await callLLMVision(config, visionPrompt, question, { base64: result.base64, mimeType: 'image/png' });
|
|
1367
|
+
|
|
1368
|
+
// Save screenshot for display
|
|
1369
|
+
const fname = result.path.split('/').pop();
|
|
1370
|
+
const ssDir = path.join(NHA_DIR, 'screenshots');
|
|
1371
|
+
fs.mkdirSync(ssDir, { recursive: true });
|
|
1372
|
+
try { fs.copyFileSync(result.path, path.join(ssDir, fname)); } catch {}
|
|
1373
|
+
|
|
1374
|
+
// Send screenshot to client
|
|
1375
|
+
sendSSE('screenshot', { filename: fname, format: 'png' });
|
|
1376
|
+
if (!res._screenshotFiles) res._screenshotFiles = [];
|
|
1377
|
+
res._screenshotFiles.push(fname);
|
|
1378
|
+
|
|
1379
|
+
// Send vision analysis as tokens
|
|
1380
|
+
sendSSE('tool_synthesis', {});
|
|
1381
|
+
sendSSE('token', { content: visionResponse });
|
|
1382
|
+
|
|
1383
|
+
toolResults.push({ action, result: `[Screen analyzed] ${visionResponse.slice(0, 500)}` });
|
|
1384
|
+
sendSSE('tool', { action, status: 'done', result: 'Screen analyzed' });
|
|
1385
|
+
} catch (visionErr) {
|
|
1386
|
+
toolResults.push({ action, result: `Vision failed: ${visionErr.message}` });
|
|
1387
|
+
sendSSE('tool', { action, status: 'error', error: visionErr.message });
|
|
1388
|
+
}
|
|
1389
|
+
continue;
|
|
1390
|
+
}
|
|
1391
|
+
|
|
1328
1392
|
const resultStr = typeof result === 'object' ? JSON.stringify(result) : String(result);
|
|
1329
1393
|
toolResults.push({ action, result: resultStr });
|
|
1330
1394
|
sendSSE('tool', { action, status: 'done', result: typeof resultStr === 'string' ? resultStr.slice(0, 500) : '' });
|
package/src/constants.mjs
CHANGED
|
@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
|
|
|
5
5
|
const __filename = fileURLToPath(import.meta.url);
|
|
6
6
|
const __dirname = path.dirname(__filename);
|
|
7
7
|
|
|
8
|
-
export const VERSION = '9.8.
|
|
8
|
+
export const VERSION = '9.8.2';
|
|
9
9
|
export const BASE_URL = 'https://nothumanallowed.com/cli';
|
|
10
10
|
export const API_BASE = 'https://nothumanallowed.com/api/v1';
|
|
11
11
|
|
package/src/services/llm.mjs
CHANGED
|
@@ -275,6 +275,96 @@ export async function callLLM(config, systemPrompt, userMessage, opts = {}) {
|
|
|
275
275
|
return callFn(apiKey, model, systemPrompt, userMessage, false);
|
|
276
276
|
}
|
|
277
277
|
|
|
278
|
+
/**
|
|
279
|
+
* Call LLM with multimodal (vision) messages — supports image + PDF content.
|
|
280
|
+
* Unified function for ALL vision calls (web UI, CLI, screen capture).
|
|
281
|
+
* @param {object} config
|
|
282
|
+
* @param {string} systemPrompt
|
|
283
|
+
* @param {string} userMessage — text question about the image
|
|
284
|
+
* @param {object} media — { base64, mimeType } (image/png, image/jpeg, application/pdf)
|
|
285
|
+
* @returns {Promise<string>}
|
|
286
|
+
*/
|
|
287
|
+
export async function callLLMVision(config, systemPrompt, userMessage, media) {
|
|
288
|
+
const provider = config.llm.provider || 'anthropic';
|
|
289
|
+
const model = config.llm.model || null;
|
|
290
|
+
const apiKey = getApiKey(config, provider);
|
|
291
|
+
if (!apiKey) throw new Error(`No API key for ${provider}. Vision requires Claude, GPT-4, or Gemini.`);
|
|
292
|
+
|
|
293
|
+
const { base64, mimeType } = media;
|
|
294
|
+
if (!base64 || !mimeType) throw new Error('media.base64 and media.mimeType are required');
|
|
295
|
+
|
|
296
|
+
if (provider === 'anthropic') {
|
|
297
|
+
const isPdf = mimeType === 'application/pdf';
|
|
298
|
+
const contentBlock = isPdf
|
|
299
|
+
? { type: 'document', source: { type: 'base64', media_type: mimeType, data: base64 } }
|
|
300
|
+
: { type: 'image', source: { type: 'base64', media_type: mimeType, data: base64 } };
|
|
301
|
+
|
|
302
|
+
const body = {
|
|
303
|
+
model: model || 'claude-sonnet-4-20250514',
|
|
304
|
+
max_tokens: 4096,
|
|
305
|
+
system: systemPrompt,
|
|
306
|
+
messages: [{ role: 'user', content: [contentBlock, { type: 'text', text: userMessage }] }],
|
|
307
|
+
};
|
|
308
|
+
const res = await fetch('https://api.anthropic.com/v1/messages', {
|
|
309
|
+
method: 'POST',
|
|
310
|
+
headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' },
|
|
311
|
+
body: JSON.stringify(body),
|
|
312
|
+
});
|
|
313
|
+
if (!res.ok) throw new Error(`Anthropic vision ${res.status}: ${(await res.text()).slice(0, 300)}`);
|
|
314
|
+
const data = await res.json();
|
|
315
|
+
return data.content?.[0]?.text || '';
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (provider === 'openai' || provider === 'deepseek' || provider === 'grok' || provider === 'mistral') {
|
|
319
|
+
const url = provider === 'openai' ? 'https://api.openai.com/v1/chat/completions'
|
|
320
|
+
: provider === 'deepseek' ? 'https://api.deepseek.com/v1/chat/completions'
|
|
321
|
+
: provider === 'grok' ? 'https://api.x.ai/v1/chat/completions'
|
|
322
|
+
: 'https://api.mistral.ai/v1/chat/completions';
|
|
323
|
+
|
|
324
|
+
const body = {
|
|
325
|
+
model: model || (provider === 'openai' ? 'gpt-4o' : model),
|
|
326
|
+
max_tokens: 4096,
|
|
327
|
+
messages: [
|
|
328
|
+
{ role: 'system', content: systemPrompt },
|
|
329
|
+
{ role: 'user', content: [
|
|
330
|
+
{ type: 'image_url', image_url: { url: `data:${mimeType};base64,${base64}` } },
|
|
331
|
+
{ type: 'text', text: userMessage },
|
|
332
|
+
] },
|
|
333
|
+
],
|
|
334
|
+
};
|
|
335
|
+
const res = await fetch(url, {
|
|
336
|
+
method: 'POST',
|
|
337
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
|
|
338
|
+
body: JSON.stringify(body),
|
|
339
|
+
});
|
|
340
|
+
if (!res.ok) throw new Error(`${provider} vision ${res.status}: ${(await res.text()).slice(0, 300)}`);
|
|
341
|
+
const data = await res.json();
|
|
342
|
+
return data.choices?.[0]?.message?.content || '';
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (provider === 'gemini') {
|
|
346
|
+
const geminiModel = model || 'gemini-2.0-flash';
|
|
347
|
+
const body = {
|
|
348
|
+
contents: [{ parts: [
|
|
349
|
+
{ inline_data: { mime_type: mimeType, data: base64 } },
|
|
350
|
+
{ text: userMessage },
|
|
351
|
+
] }],
|
|
352
|
+
systemInstruction: { parts: [{ text: systemPrompt }] },
|
|
353
|
+
generationConfig: { maxOutputTokens: 4096 },
|
|
354
|
+
};
|
|
355
|
+
const res = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${geminiModel}:generateContent?key=${apiKey}`, {
|
|
356
|
+
method: 'POST',
|
|
357
|
+
headers: { 'Content-Type': 'application/json' },
|
|
358
|
+
body: JSON.stringify(body),
|
|
359
|
+
});
|
|
360
|
+
if (!res.ok) throw new Error(`Gemini vision ${res.status}: ${(await res.text()).slice(0, 300)}`);
|
|
361
|
+
const data = await res.json();
|
|
362
|
+
return data.candidates?.[0]?.content?.parts?.[0]?.text || '';
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
throw new Error(`Vision not supported for provider: ${provider}. Use Claude, GPT-4, or Gemini.`);
|
|
366
|
+
}
|
|
367
|
+
|
|
278
368
|
/**
|
|
279
369
|
* Call an LLM provider with streaming enabled.
|
|
280
370
|
* Calls onToken(chunk) for each token, returns full text at the end.
|