npm - nothumanallowed - Versions diffs - 15.1.63 → 15.1.64 - Mend

nothumanallowed 15.1.63 → 15.1.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/constants.mjs +1 -1
package/src/server/routes/chat.mjs +27 -1
package/src/services/message-responder.mjs +114 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nothumanallowed",
-  "version": "15.1.63",
+  "version": "15.1.64",
   "description": "NotHumanAllowed — 38 AI agents, 80 tools, Studio (visual agentic workflows). Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, Alexandria E2E messaging, GitHub, Notion, Slack, voice chat, free AI (Liara), 28 languages. Zero-dependency CLI.",
   "type": "module",
   "bin": {

package/src/constants.mjs CHANGED Viewed

@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
-export const VERSION = '15.1.63';
+export const VERSION = '15.1.64';
 export const BASE_URL = 'https://nothumanallowed.com/cli';
 export const API_BASE = 'https://nothumanallowed.com/api/v1';

package/src/server/routes/chat.mjs CHANGED Viewed

@@ -17,7 +17,7 @@ import {
 import { callLLMStream, callLLM, callLLMVision, parseAgentFile } from '../../services/llm.mjs';
 import { buildMemoryContext } from '../../services/memory.mjs';
 import { parseActions, executeTool, buildSystemPrompt, stripOrphanFences } from '../../services/tool-executor.mjs';
-import { detectLanguage } from '../../services/message-responder.mjs';
+import { detectLanguage, tryDirectActionAll } from '../../services/message-responder.mjs';
 // Migrate on import (once)
 migrateOldHistory();
@@ -267,6 +267,32 @@ export function register(router) {
     }, 3000);
     try {
+      // ── Deterministic direct-action dispatcher (LLM-NLU + server execute) ──
+      // Same architecture used by Telegram/Discord. Before invoking the chat
+      // LLM, classify the message: if it maps to a state-changing tool
+      // (calendar/email/task/file/drive/slack/notion/github/...), execute it
+      // deterministically server-side and stream the result. No more "the
+      // model said done but didn't call the tool".
+      const direct = await tryDirectActionAll(msg, config, {
+        auditKey: `chat:${body.conversationId || 'anon'}`,
+      });
+      if (direct) {
+        if (heartbeatInterval) { clearInterval(heartbeatInterval); heartbeatInterval = null; }
+        sse('tool', { action: direct.action, status: 'done', result: (direct.message || '').slice(0, 240) });
+        sse('token', { content: direct.message });
+        // Persist to conversation
+        if (body.conversationId) {
+          try {
+            const conv = loadConversation(body.conversationId);
+            if (conv) addMessages(conv, msg, direct.message);
+          } catch {}
+        }
+        sse('done', { content: direct.message });
+        res.write('data: [DONE]\n\n');
+        res.end();
+        return;
+      }
       let fullResponse = '';
       fullResponse = await callLLMStream(config, enrichedPrompt, userMessage, (chunk) => {
         clearInterval(heartbeatInterval);

package/src/services/message-responder.mjs CHANGED Viewed

@@ -829,9 +829,63 @@ class TelegramResponder {
     // Track this user for broadcast notifications (update alerts, etc.)
     touchTelegramUser(chatId, message.from?.username, message.from?.first_name);
-    let rawText = message.text || '';
+    let rawText = message.text || message.caption || '';
     let isVoice = false;
+    // ── Image / photo handler (vision via Liara or fallback provider) ──────
+    // Telegram sends `message.photo` as an array of size variants — we pick
+    // the largest. For documents (e.g. screenshots sent as files), we accept
+    // any mime starting with image/.
+    const photo = Array.isArray(message.photo) && message.photo.length
+      ? message.photo[message.photo.length - 1]
+      : null;
+    const isImageDoc = message.document && /^image\//.test(message.document.mime_type || '');
+    if (photo || isImageDoc) {
+      try {
+        await this._telegramCall('sendChatAction', { chat_id: chatId, action: 'typing' });
+        const fileId = photo ? photo.file_id : message.document.file_id;
+        const fileInfo = await this._telegramCall('getFile', { file_id: fileId });
+        const filePath = fileInfo?.result?.file_path;
+        if (!filePath) throw new Error('Telegram file_path missing');
+        const fileUrl = `https://api.telegram.org/file/bot${this.token}/${filePath}`;
+        const fileRes = await fetch(fileUrl);
+        if (!fileRes.ok) throw new Error(`Telegram file fetch ${fileRes.status}`);
+        const buf = Buffer.from(await fileRes.arrayBuffer());
+        const base64 = buf.toString('base64');
+        // Infer mediaType from file_path extension.
+        const ext = (filePath.split('.').pop() || 'jpg').toLowerCase();
+        const mediaType = ext === 'png' ? 'image/png'
+                        : ext === 'gif' ? 'image/gif'
+                        : ext === 'webp' ? 'image/webp'
+                        : 'image/jpeg';
+        const userPrompt = rawText.trim()
+          || 'Describe this image in detail. If it contains text, transcribe it exactly. Reply in Italian.';
+        const langInstruction = detectLanguage(userPrompt) || (rawText ? null : null);
+        const sysPrompt = `You are a helpful visual assistant. ${langInstruction === 'English' ? 'Reply in English.' : 'Rispondi in italiano.'} Be specific and accurate. If asked to extract text, transcribe it verbatim. If asked to identify objects, list them clearly.`;
+        const { callLLMVision } = await import('./llm.mjs');
+        const description = await callLLMVision(this.config, sysPrompt, userPrompt, { base64, mediaType });
+        const truncated = description.length > 4000 ? description.slice(0, 3950) + '\n\n... [truncated]' : description;
+        // Audit
+        this._recordAudit(chatId, {
+          tool: 'vision_describe',
+          success: true,
+          summary: `Image (${Math.round(buf.length / 1024)} KB) — "${(userPrompt).slice(0, 60)}"`,
+        });
+        const personaName = this.config.responder?.telegram?.botName || this.config.responder?.botName || '';
+        const personaMode = this.config.responder?.telegram?.personaMode || (personaName ? 'persona' : 'agent');
+        const prefix = personaMode === 'persona-only' && personaName ? ''
+                     : personaName ? `[${personaName}]\n\n`
+                     : `[HERALD]\n\n`;
+        await this._telegramCall('sendMessage', { chat_id: chatId, text: prefix + truncated });
+        this.log(`[Telegram] Image vision response to ${fromUser} (${buf.length} bytes, ${description.length} chars)`);
+      } catch (err) {
+        this.log(`[Telegram] Vision failed: ${err.message}`);
+        await this._telegramCall('sendMessage', { chat_id: chatId,
+          text: `Non riesco ad analizzare l'immagine: ${err.message}` }).catch(() => {});
+      }
+      return;
+    }
     // Handle voice notes — transcribe with Whisper (Groq or OpenAI)
     if (message.voice || message.audio) {
       const fileId = (message.voice || message.audio).file_id;
@@ -2189,6 +2243,50 @@ class TelegramResponder {
   }
 }
+// ── Shared direct-action dispatcher (Telegram / Discord / Chat WebUI / Voice) ─
+// A reusable, instance-less handler. Internally piggybacks on a singleton
+// TelegramResponder built with a dummy config — we only use it as a host for
+// the `_tryDirectFresh*` methods. The audit log is keyed by the caller's
+// own `auditKey` (chatId for Telegram, channelId for Discord, conversationId
+// for Chat WebUI), so each platform keeps its own action history without
+// crossing wires.
+let _sharedDirectHandler = null;
+function _getDirectHandler() {
+  if (!_sharedDirectHandler) {
+    _sharedDirectHandler = new TelegramResponder(
+      { responder: { telegram: { token: '__noop__' } } },
+      () => {},
+      () => {},
+    );
+    // Ensure the in-memory store exists.
+    _sharedDirectHandler._lastContextByChatId = _sharedDirectHandler._lastContextByChatId || {};
+  }
+  return _sharedDirectHandler;
+}
+/**
+ * Try every direct-action handler in order (fast-path → universal). Returns
+ * `{action, success, message}` on hit, `null` if nothing claimed the message.
+ *
+ * @param {string} text       — the raw user message in any language
+ * @param {object} config     — loaded nha config (used by tools + LLM NLU)
+ * @param {object} [opts]
+ *   @param {string} [opts.auditKey]   — stable key for action audit (chatId, channelId, conversationId…)
+ *   @param {(line:string)=>void} [opts.log] — optional logger
+ */
+export async function tryDirectActionAll(text, config, opts = {}) {
+  const h = _getDirectHandler();
+  if (opts.auditKey) h._lastDirectAuditChatId = opts.auditKey;
+  if (opts.log) h.log = opts.log;
+  return await h._tryDirectFreshCalendarAction(text, config)
+      || await h._tryDirectFreshEmailAction(text, config)
+      || await h._tryDirectFreshTaskAction(text, config)
+      || await h._tryDirectFreshNoteAction(text, config)
+      || await h._tryDirectFreshReminderAction(text, config)
+      || await h._tryDirectFreshSlackAction(text, config)
+      || await h._tryDirectFreshUniversalAction(text, config);
+}
 // ── Discord Bot (Gateway WebSocket via raw TLS, zero dependencies) ───────────
 class DiscordResponder {
@@ -2512,6 +2610,21 @@ class DiscordResponder {
     this.pendingRequests++;
     try {
+      // Try the deterministic direct-action dispatcher BEFORE routing to an
+      // LLM agent. Same architecture used by Telegram: LLM only for NLU,
+      // tool execution always server-side, audit log per channel.
+      const directFresh = await tryDirectActionAll(cleanText, this.config, {
+        auditKey: `discord:${channelId}`,
+        log: this.log,
+      });
+      if (directFresh) {
+        await this._discordApiCall('POST', `/channels/${channelId}/messages`, {
+          content: directFresh.message,
+        });
+        this.log(`[Discord] direct-action ${directFresh.action} → ${directFresh.success ? 'OK' : 'FAIL'}`);
+        return;
+      }
       const agent = routeMessage(cleanText, this.autoRoute);
       this.log(`[Discord] ${fromUser} (#${channelId}): routed to ${agent.toUpperCase()}`);