nothumanallowed 15.1.63 → 15.1.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nothumanallowed",
3
- "version": "15.1.63",
3
+ "version": "15.1.65",
4
4
  "description": "NotHumanAllowed — 38 AI agents, 80 tools, Studio (visual agentic workflows). Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, Alexandria E2E messaging, GitHub, Notion, Slack, voice chat, free AI (Liara), 28 languages. Zero-dependency CLI.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/constants.mjs CHANGED
@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
5
5
  const __filename = fileURLToPath(import.meta.url);
6
6
  const __dirname = path.dirname(__filename);
7
7
 
8
- export const VERSION = '15.1.63';
8
+ export const VERSION = '15.1.65';
9
9
  export const BASE_URL = 'https://nothumanallowed.com/cli';
10
10
  export const API_BASE = 'https://nothumanallowed.com/api/v1';
11
11
 
@@ -17,7 +17,7 @@ import {
17
17
  import { callLLMStream, callLLM, callLLMVision, parseAgentFile } from '../../services/llm.mjs';
18
18
  import { buildMemoryContext } from '../../services/memory.mjs';
19
19
  import { parseActions, executeTool, buildSystemPrompt, stripOrphanFences } from '../../services/tool-executor.mjs';
20
- import { detectLanguage } from '../../services/message-responder.mjs';
20
+ import { detectLanguage, tryDirectActionAll } from '../../services/message-responder.mjs';
21
21
 
22
22
  // Migrate on import (once)
23
23
  migrateOldHistory();
@@ -267,6 +267,32 @@ export function register(router) {
267
267
  }, 3000);
268
268
 
269
269
  try {
270
+ // ── Deterministic direct-action dispatcher (LLM-NLU + server execute) ──
271
+ // Same architecture used by Telegram/Discord. Before invoking the chat
272
+ // LLM, classify the message: if it maps to a state-changing tool
273
+ // (calendar/email/task/file/drive/slack/notion/github/...), execute it
274
+ // deterministically server-side and stream the result. No more "the
275
+ // model said done but didn't call the tool".
276
+ const direct = await tryDirectActionAll(msg, config, {
277
+ auditKey: `chat:${body.conversationId || 'anon'}`,
278
+ });
279
+ if (direct) {
280
+ if (heartbeatInterval) { clearInterval(heartbeatInterval); heartbeatInterval = null; }
281
+ sse('tool', { action: direct.action, status: 'done', result: (direct.message || '').slice(0, 240) });
282
+ sse('token', { content: direct.message });
283
+ // Persist to conversation
284
+ if (body.conversationId) {
285
+ try {
286
+ const conv = loadConversation(body.conversationId);
287
+ if (conv) addMessages(conv, msg, direct.message);
288
+ } catch {}
289
+ }
290
+ sse('done', { content: direct.message });
291
+ res.write('data: [DONE]\n\n');
292
+ res.end();
293
+ return;
294
+ }
295
+
270
296
  let fullResponse = '';
271
297
  fullResponse = await callLLMStream(config, enrichedPrompt, userMessage, (chunk) => {
272
298
  clearInterval(heartbeatInterval);
@@ -829,9 +829,63 @@ class TelegramResponder {
829
829
  // Track this user for broadcast notifications (update alerts, etc.)
830
830
  touchTelegramUser(chatId, message.from?.username, message.from?.first_name);
831
831
 
832
- let rawText = message.text || '';
832
+ let rawText = message.text || message.caption || '';
833
833
  let isVoice = false;
834
834
 
835
+ // ── Image / photo handler (vision via Liara or fallback provider) ──────
836
+ // Telegram sends `message.photo` as an array of size variants — we pick
837
+ // the largest. For documents (e.g. screenshots sent as files), we accept
838
+ // any mime starting with image/.
839
+ const photo = Array.isArray(message.photo) && message.photo.length
840
+ ? message.photo[message.photo.length - 1]
841
+ : null;
842
+ const isImageDoc = message.document && /^image\//.test(message.document.mime_type || '');
843
+ if (photo || isImageDoc) {
844
+ try {
845
+ await this._telegramCall('sendChatAction', { chat_id: chatId, action: 'typing' });
846
+ const fileId = photo ? photo.file_id : message.document.file_id;
847
+ const fileInfo = await this._telegramCall('getFile', { file_id: fileId });
848
+ const filePath = fileInfo?.result?.file_path;
849
+ if (!filePath) throw new Error('Telegram file_path missing');
850
+ const fileUrl = `https://api.telegram.org/file/bot${this.token}/${filePath}`;
851
+ const fileRes = await fetch(fileUrl);
852
+ if (!fileRes.ok) throw new Error(`Telegram file fetch ${fileRes.status}`);
853
+ const buf = Buffer.from(await fileRes.arrayBuffer());
854
+ const base64 = buf.toString('base64');
855
+ // Infer mediaType from file_path extension.
856
+ const ext = (filePath.split('.').pop() || 'jpg').toLowerCase();
857
+ const mediaType = ext === 'png' ? 'image/png'
858
+ : ext === 'gif' ? 'image/gif'
859
+ : ext === 'webp' ? 'image/webp'
860
+ : 'image/jpeg';
861
+ const userPrompt = rawText.trim()
862
+ || 'Describe this image in detail. If it contains text, transcribe it exactly. Reply in Italian.';
863
+ const langInstruction = detectLanguage(userPrompt) || (rawText ? null : null);
864
+ const sysPrompt = `You are a helpful visual assistant. ${langInstruction === 'English' ? 'Reply in English.' : 'Rispondi in italiano.'} Be specific and accurate. If asked to extract text, transcribe it verbatim. If asked to identify objects, list them clearly.`;
865
+ const { callLLMVision } = await import('./llm.mjs');
866
+ const description = await callLLMVision(this.config, sysPrompt, userPrompt, { base64, mediaType });
867
+ const truncated = description.length > 4000 ? description.slice(0, 3950) + '\n\n... [truncated]' : description;
868
+ // Audit
869
+ this._recordAudit(chatId, {
870
+ tool: 'vision_describe',
871
+ success: true,
872
+ summary: `Image (${Math.round(buf.length / 1024)} KB) — "${(userPrompt).slice(0, 60)}"`,
873
+ });
874
+ const personaName = this.config.responder?.telegram?.botName || this.config.responder?.botName || '';
875
+ const personaMode = this.config.responder?.telegram?.personaMode || (personaName ? 'persona' : 'agent');
876
+ const prefix = personaMode === 'persona-only' && personaName ? ''
877
+ : personaName ? `[${personaName}]\n\n`
878
+ : `[HERALD]\n\n`;
879
+ await this._telegramCall('sendMessage', { chat_id: chatId, text: prefix + truncated });
880
+ this.log(`[Telegram] Image vision response to ${fromUser} (${buf.length} bytes, ${description.length} chars)`);
881
+ } catch (err) {
882
+ this.log(`[Telegram] Vision failed: ${err.message}`);
883
+ await this._telegramCall('sendMessage', { chat_id: chatId,
884
+ text: `Non riesco ad analizzare l'immagine: ${err.message}` }).catch(() => {});
885
+ }
886
+ return;
887
+ }
888
+
835
889
  // Handle voice notes — transcribe with Whisper (Groq or OpenAI)
836
890
  if (message.voice || message.audio) {
837
891
  const fileId = (message.voice || message.audio).file_id;
@@ -2189,6 +2243,50 @@ class TelegramResponder {
2189
2243
  }
2190
2244
  }
2191
2245
 
2246
+ // ── Shared direct-action dispatcher (Telegram / Discord / Chat WebUI / Voice) ─
2247
+ // A reusable, instance-less handler. Internally piggybacks on a singleton
2248
+ // TelegramResponder built with a dummy config — we only use it as a host for
2249
+ // the `_tryDirectFresh*` methods. The audit log is keyed by the caller's
2250
+ // own `auditKey` (chatId for Telegram, channelId for Discord, conversationId
2251
+ // for Chat WebUI), so each platform keeps its own action history without
2252
+ // crossing wires.
2253
+ let _sharedDirectHandler = null;
2254
+ function _getDirectHandler() {
2255
+ if (!_sharedDirectHandler) {
2256
+ _sharedDirectHandler = new TelegramResponder(
2257
+ { responder: { telegram: { token: '__noop__' } } },
2258
+ () => {},
2259
+ () => {},
2260
+ );
2261
+ // Ensure the in-memory store exists.
2262
+ _sharedDirectHandler._lastContextByChatId = _sharedDirectHandler._lastContextByChatId || {};
2263
+ }
2264
+ return _sharedDirectHandler;
2265
+ }
2266
+
2267
+ /**
2268
+ * Try every direct-action handler in order (fast-path → universal). Returns
2269
+ * `{action, success, message}` on hit, `null` if nothing claimed the message.
2270
+ *
2271
+ * @param {string} text — the raw user message in any language
2272
+ * @param {object} config — loaded nha config (used by tools + LLM NLU)
2273
+ * @param {object} [opts]
2274
+ * @param {string} [opts.auditKey] — stable key for action audit (chatId, channelId, conversationId…)
2275
+ * @param {(line:string)=>void} [opts.log] — optional logger
2276
+ */
2277
+ export async function tryDirectActionAll(text, config, opts = {}) {
2278
+ const h = _getDirectHandler();
2279
+ if (opts.auditKey) h._lastDirectAuditChatId = opts.auditKey;
2280
+ if (opts.log) h.log = opts.log;
2281
+ return await h._tryDirectFreshCalendarAction(text, config)
2282
+ || await h._tryDirectFreshEmailAction(text, config)
2283
+ || await h._tryDirectFreshTaskAction(text, config)
2284
+ || await h._tryDirectFreshNoteAction(text, config)
2285
+ || await h._tryDirectFreshReminderAction(text, config)
2286
+ || await h._tryDirectFreshSlackAction(text, config)
2287
+ || await h._tryDirectFreshUniversalAction(text, config);
2288
+ }
2289
+
2192
2290
  // ── Discord Bot (Gateway WebSocket via raw TLS, zero dependencies) ───────────
2193
2291
 
2194
2292
  class DiscordResponder {
@@ -2512,6 +2610,21 @@ class DiscordResponder {
2512
2610
  this.pendingRequests++;
2513
2611
 
2514
2612
  try {
2613
+ // Try the deterministic direct-action dispatcher BEFORE routing to an
2614
+ // LLM agent. Same architecture used by Telegram: LLM only for NLU,
2615
+ // tool execution always server-side, audit log per channel.
2616
+ const directFresh = await tryDirectActionAll(cleanText, this.config, {
2617
+ auditKey: `discord:${channelId}`,
2618
+ log: this.log,
2619
+ });
2620
+ if (directFresh) {
2621
+ await this._discordApiCall('POST', `/channels/${channelId}/messages`, {
2622
+ content: directFresh.message,
2623
+ });
2624
+ this.log(`[Discord] direct-action ${directFresh.action} → ${directFresh.success ? 'OK' : 'FAIL'}`);
2625
+ return;
2626
+ }
2627
+
2515
2628
  const agent = routeMessage(cleanText, this.autoRoute);
2516
2629
  this.log(`[Discord] ${fromUser} (#${channelId}): routed to ${agent.toUpperCase()}`);
2517
2630