npm - nothumanallowed - Versions diffs - 16.0.12 → 16.0.13 - Mend

nothumanallowed 16.0.12 → 16.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/src/constants.mjs +1 -1
package/src/server/routes/chat.mjs +71 -35
package/src/server/routes/config.mjs +18 -0
package/src/services/llm.mjs +21 -0
package/src/services/message-responder.mjs +73 -3
package/src/services/user-memory.mjs +48 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nothumanallowed",
-  "version": "16.0.12",
+  "version": "16.0.13",
   "description": "NotHumanAllowed — 38 AI agents, 80 tools, Studio (visual agentic workflows). Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, Alexandria E2E messaging, GitHub, Notion, Slack, voice chat, free AI (Liara), 28 languages. Zero-dependency CLI.",
   "type": "module",
   "bin": {

package/src/constants.mjs CHANGED Viewed

@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
-export const VERSION = '16.0.12';
+export const VERSION = '16.0.13';
 export const BASE_URL = 'https://nothumanallowed.com/cli';
 export const API_BASE = 'https://nothumanallowed.com/api/v1';

package/src/server/routes/chat.mjs CHANGED Viewed

@@ -229,43 +229,77 @@ export function register(router) {
       content: (h.content || '').replace(/!\[Screenshot\]\(data:image\/[^)]+\)/g, '[Screenshot taken]'),
     })).filter(m => m.content);
-    // ── Rolling summary (Fix 2) ──
-    // After a threshold of turns we generate (or reuse) a compact summary of
-    // everything OLDER than the recent window, persisted in the conversation
-    // object. The recent window stays as raw messages[]. This is the
-    // "memory like ChatGPT/Claude" pattern.
-    const RECENT = 12;
+    // ── Rolling summary (Fix 2) — TOKEN-based threshold ──
+    // Industry pattern (Claude context compaction, ChatGPT memory): trigger
+    // summary when the OLDER messages would consume more than a budget,
+    // measured in tokens (~chars/4). Provider-aware budget:
+    //   - anthropic / openai / gemini → 24k tokens raw before summary
+    //   - nha (Liara/Qwen 32B 32k ctx) → 8k tokens raw before summary
+    //   - others → 8k as safe default
+    // Plus per-turn cap of MAX_RECENT turns so latency stays bounded.
+    const provider = config.llm?.provider || (config.llm?.apiKey ? 'anthropic' : 'nha');
+    const TOKEN_BUDGET_BY_PROVIDER = {
+      anthropic: 24000, openai: 24000, gemini: 24000,
+      nha: 8000, deepseek: 16000, grok: 16000, mistral: 16000, cohere: 8000,
+    };
+    const tokenBudget = TOKEN_BUDGET_BY_PROVIDER[provider] || 8000;
+    const MAX_RECENT_TURNS = 30;     // hard cap (latency safeguard)
+    const approxTokens = (s) => Math.ceil((s || '').length / 4);
     let conversationSummary = '';
     let recentHistory = rawHistory;
-    if (rawHistory.length > RECENT) {
-      recentHistory = rawHistory.slice(-RECENT);
-      const older = rawHistory.slice(0, -RECENT);
-      // Try to reuse a cached summary from the conversation, regenerate if
-      // the older slice grew beyond what the cached summary covered.
-      let cachedConv = null;
-      if (body.conversationId) {
-        try { cachedConv = loadConversation(body.conversationId); } catch {}
+    if (rawHistory.length > 0) {
+      // Walk backwards accumulating tokens until we exceed the budget OR
+      // hit MAX_RECENT_TURNS. Everything BEFORE that index goes into summary.
+      let recentTokens = 0;
+      let splitIdx = 0;
+      for (let i = rawHistory.length - 1; i >= 0; i--) {
+        const t = approxTokens(rawHistory[i].content);
+        if (recentTokens + t > tokenBudget) { splitIdx = i + 1; break; }
+        if (rawHistory.length - i > MAX_RECENT_TURNS) { splitIdx = i + 1; break; }
+        recentTokens += t;
+        splitIdx = i;
       }
-      const cached = cachedConv?.rollingSummary;
-      if (cached && cached.coveredTurns === older.length) {
-        conversationSummary = cached.text;
-      } else {
-        // Generate a fresh summary via the same LLM. Compact, factual.
-        const summaryInput = older.map(m =>
-          `${m.role === 'user' ? 'Utente' : 'Assistente'}: ${m.content.slice(0, 600)}`
-        ).join('\n\n');
-        try {
-          conversationSummary = await callLLM(
-            config,
-            'Sei un sintetizzatore di conversazione. Riassumi in italiano in 200-400 token TUTTI i fatti, decisioni, preferenze utente, dati specifici (date, ID, nomi, numeri) emersi nella conversazione. Niente abbellimenti, solo informazione utile per ricostruire il contesto.',
-            summaryInput,
-            { max_tokens: 600, temperature: 0.2 },
-          );
-          if (cachedConv) {
-            cachedConv.rollingSummary = { text: conversationSummary, coveredTurns: older.length, at: new Date().toISOString() };
-            try { saveConversation(cachedConv); } catch {}
-          }
-        } catch { /* if summary fails, just skip it — recent history is enough */ }
+      recentHistory = rawHistory.slice(splitIdx);
+      const older = rawHistory.slice(0, splitIdx);
+      if (older.length > 0) {
+        // Reuse cached summary when the older slice hasn't grown.
+        let cachedConv = null;
+        if (body.conversationId) {
+          try { cachedConv = loadConversation(body.conversationId); } catch {}
+        }
+        const cached = cachedConv?.rollingSummary;
+        if (cached && cached.coveredTurns === older.length) {
+          conversationSummary = cached.text;
+        } else {
+          // Build summary input in user language. Trim individual turns to
+          // 1200 chars each (older context loses fine-grained details).
+          const summaryInput = older.map(m =>
+            `${m.role === 'user' ? 'User' : 'Assistant'}: ${m.content.slice(0, 1200)}`
+          ).join('\n\n');
+          const langLabel = userLang === 'it' ? 'in italiano' : `in ${userLang}`;
+          try {
+            conversationSummary = await callLLM(
+              config,
+              `You are a conversation summarizer. Summarize ${langLabel} in 200-500 tokens ALL facts, decisions, user preferences, specific data (dates, IDs, names, numbers, file paths, URLs) that emerged. No fluff, only information useful to reconstruct context. Preserve the language the user spoke in.`,
+              summaryInput,
+              { max_tokens: 700, temperature: 0.2 },
+            );
+            // Meta-compress: if the previous cached summary exists AND together
+            // with new content the result would balloon, replace fully with the
+            // new compact one (we just generated it from full older slice).
+            if (cachedConv) {
+              cachedConv.rollingSummary = {
+                text: conversationSummary,
+                coveredTurns: older.length,
+                coveredTokens: older.reduce((a, m) => a + approxTokens(m.content), 0),
+                at: new Date().toISOString(),
+              };
+              try { saveConversation(cachedConv); } catch {}
+            }
+          } catch { /* if summary fails, just skip it — recent history is enough */ }
+        }
       }
     }
@@ -278,9 +312,11 @@ export function register(router) {
     // ── User memory (Fix 3) — persistent across conversations + channels ──
     // Loaded from ~/.nha/user-memory.md, prepended to the system prompt.
     try {
-      const { buildMemoryPrefix } = await import('../../services/user-memory.mjs');
+      const { buildMemoryPrefix, autoLearnFromTurn } = await import('../../services/user-memory.mjs');
       const memPrefix = buildMemoryPrefix();
       if (memPrefix) effectiveSystemPrompt = `${memPrefix}${effectiveSystemPrompt || ''}`;
+      // Auto-learn — fire and forget, doesn't block the response.
+      autoLearnFromTurn(msg, config).catch(() => null);
     } catch {}
     // The final user message — keep the per-turn language tag close to the

package/src/server/routes/config.mjs CHANGED Viewed

@@ -32,6 +32,24 @@ export function register(router) {
     sendJSON(res, 200, { ok: true, version: VERSION, ts: Date.now() });
   });
+  // GET /api/audit/query — query the cross-channel audit log.
+  // Optional query params: tool, channel, since (ms timestamp), limit.
+  router.get('/api/audit/query', async (req, res) => {
+    try {
+      const { queryAuditLog } = await import('../../services/message-responder.mjs');
+      const url = new URL(req.url, 'http://localhost');
+      const entries = queryAuditLog({
+        tool: url.searchParams.get('tool') || undefined,
+        channel: url.searchParams.get('channel') || undefined,
+        since: url.searchParams.get('since') ? parseInt(url.searchParams.get('since'), 10) : undefined,
+        limit: parseInt(url.searchParams.get('limit') || '100', 10),
+      });
+      sendJSON(res, 200, { entries });
+    } catch (e) {
+      sendJSON(res, 500, { error: e.message });
+    }
+  });
   // GET /api/version/check
   //
   // Returns three version signals so the UI can distinguish three states:

package/src/services/llm.mjs CHANGED Viewed

@@ -495,12 +495,23 @@ export async function callGemini(apiKey, model, systemPrompt, userMessage, _stre
   return data.candidates?.[0]?.content?.parts?.[0]?.text || '';
 }
+// OpenAI-compatible history mapper — used by DeepSeek/Grok/Mistral/Cohere.
+function _openaiHistory(opts) {
+  return Array.isArray(opts?.history)
+    ? opts.history.filter(m => m && m.role && m.content).map(m => ({
+        role: m.role === 'assistant' ? 'assistant' : 'user',
+        content: String(m.content),
+      }))
+    : [];
+}
 export async function callDeepSeek(apiKey, model, systemPrompt, userMessage, stream = false, opts = {}) {
   const body = {
     model: model || 'deepseek-chat',
     max_tokens: opts.max_tokens || 8192,
     messages: [
       { role: 'system', content: systemPrompt },
+      ..._openaiHistory(opts),
       { role: 'user', content: userMessage },
     ],
     stream,
@@ -529,6 +540,7 @@ export async function callGrok(apiKey, model, systemPrompt, userMessage, stream
     max_tokens: opts.max_tokens || 8192,
     messages: [
       { role: 'system', content: systemPrompt },
+      ..._openaiHistory(opts),
       { role: 'user', content: userMessage },
     ],
     stream,
@@ -557,6 +569,7 @@ export async function callMistral(apiKey, model, systemPrompt, userMessage, stre
     max_tokens: opts.max_tokens || 8192,
     messages: [
       { role: 'system', content: systemPrompt },
+      ..._openaiHistory(opts),
       { role: 'user', content: userMessage },
     ],
     stream,
@@ -580,10 +593,18 @@ export async function callMistral(apiKey, model, systemPrompt, userMessage, stre
 }
 export async function callCohere(apiKey, model, systemPrompt, userMessage, _stream = false, opts = {}) {
+  // Cohere uses a 'chat_history' array with role: USER/CHATBOT (uppercase).
+  const cohereHistory = Array.isArray(opts.history)
+    ? opts.history.filter(m => m && m.role && m.content).map(m => ({
+        role: m.role === 'assistant' ? 'CHATBOT' : 'USER',
+        message: String(m.content),
+      }))
+    : [];
   const body = {
     model: model || 'command-r-plus',
     max_tokens: opts.max_tokens || 8192,
     preamble: systemPrompt,
+    chat_history: cohereHistory,
     message: userMessage,
   };
   if (opts.temperature !== undefined) body.temperature = opts.temperature;

package/src/services/message-responder.mjs CHANGED Viewed

@@ -23,12 +23,37 @@ import { VERSION } from '../constants.mjs';
 // (telegram, discord, chat web, AWF agents). Lets the user ask "what have you
 // done today?" from any surface and get a consistent answer.
 const _GLOBAL_AUDIT_FILE = path.join(os.homedir(), '.nha', 'audit-log.jsonl');
+const _AUDIT_MAX_LINES = 10000;          // rotate at 10k lines (~1MB JSONL)
+const _AUDIT_ARCHIVE_PREFIX = 'audit-log-';
+function _rotateAuditIfNeeded() {
+  try {
+    if (!fs.existsSync(_GLOBAL_AUDIT_FILE)) return;
+    const stat = fs.statSync(_GLOBAL_AUDIT_FILE);
+    // Quick check: skip the line count unless file is bigger than ~1.5MB
+    if (stat.size < 1_500_000) return;
+    const text = fs.readFileSync(_GLOBAL_AUDIT_FILE, 'utf-8');
+    const lines = text.split('\n').filter(Boolean);
+    if (lines.length <= _AUDIT_MAX_LINES) return;
+    // Archive older half, keep most recent _AUDIT_MAX_LINES.
+    const tail = lines.slice(-_AUDIT_MAX_LINES);
+    const archived = lines.slice(0, lines.length - _AUDIT_MAX_LINES);
+    const ts = new Date().toISOString().replace(/[:.]/g, '-');
+    const archiveFile = path.join(path.dirname(_GLOBAL_AUDIT_FILE), `${_AUDIT_ARCHIVE_PREFIX}${ts}.jsonl`);
+    fs.writeFileSync(archiveFile, archived.join('\n') + '\n');
+    fs.writeFileSync(_GLOBAL_AUDIT_FILE, tail.join('\n') + '\n');
+  } catch {}
+}
 function _appendGlobalAudit(entry) {
   try {
     fs.mkdirSync(path.dirname(_GLOBAL_AUDIT_FILE), { recursive: true });
     fs.appendFileSync(_GLOBAL_AUDIT_FILE, JSON.stringify(entry) + '\n');
+    // Rotate occasionally (cheap stat-check; full scan only if size > 1.5MB).
+    if (Math.random() < 0.01) _rotateAuditIfNeeded();
   } catch {}
 }
 function _readGlobalAudit(limitTail = 100) {
   try {
     if (!fs.existsSync(_GLOBAL_AUDIT_FILE)) return [];
@@ -40,6 +65,20 @@ function _readGlobalAudit(limitTail = 100) {
   } catch { return []; }
 }
+/**
+ * Query the audit log with filters. Exported for the HTTP /api/audit/query
+ * endpoint. Supports filtering by tool, channel, since timestamp.
+ */
+export function queryAuditLog({ tool, channel, since, limit = 100 } = {}) {
+  const all = _readGlobalAudit(10000);
+  return all.filter(e => {
+    if (tool && e.tool !== tool) return false;
+    if (channel && e.channel !== channel) return false;
+    if (since && e.ts < since) return false;
+    return true;
+  }).slice(-limit);
+}
 // ── Agent Routing (keyword-based, zero LLM calls) ───────────────────────────
 const ROUTING_TABLE = [
@@ -1200,6 +1239,18 @@ class TelegramResponder {
       const auditNote = this._renderAuditForPrompt(chatId);
       if (auditNote) enrichedMessage = auditNote + enrichedMessage;
+      // ── User memory (Fix 3+D v16.0.13) — cross-channel persistent context.
+      // Same memory file that's used by the chat web UI. The user can
+      // `nha memory add "I prefer concise answers"` once and EVERY channel
+      // honors it.
+      try {
+        const { buildMemoryPrefix, autoLearnFromTurn } = await import('./user-memory.mjs');
+        const memPrefix = buildMemoryPrefix();
+        if (memPrefix) enrichedMessage = memPrefix + enrichedMessage;
+        // Auto-learn — fire and forget, doesn't block the response.
+        autoLearnFromTurn(cleanText, this.config).catch(() => null);
+      } catch {}
       if (TOOL_AGENTS.has(agent)) {
         const result = await callAgentWithTools(this.config, agent, enrichedMessage, detectedLang, preHistory);
         responseText = result.text;
@@ -1987,7 +2038,7 @@ class TelegramResponder {
         // Clear the pending state so we don't double-delete on next yes.
         delete this._lastContextByChatId[chatId].pendingDeleteEvents;
         delete this._lastContextByChatId[chatId].lastCalendarEvents;
-        try { (await import('./telegram-context.mjs')).saveTelegramContext(this._lastContextByChatId); } catch {}
+        try { saveTelegramContext(this._lastContextByChatId); } catch {}
         const subject = eligible.length === 1 ? `"${eligible[0].summary}"` : `${eligible.length} appuntamenti`;
         const lines = [`Ho cancellato ${subject}.`];
@@ -2194,7 +2245,7 @@ class TelegramResponder {
               lastCalendarListAt: Date.now(),
               lastCalendarSource: { tool: toolName, args },
             };
-            try { (await import('./telegram-context.mjs')).saveTelegramContext(this._lastContextByChatId); } catch {}
+            try { saveTelegramContext(this._lastContextByChatId); } catch {}
           }
           return { action: actionKey, success: true, message: String(out) };
         } catch (e) { return { action: actionKey, success: false, message: `Errore: ${e.message}` }; }
@@ -2786,7 +2837,26 @@ class DiscordResponder {
       // Tool-capable agents use the full tool execution loop
       const TOOL_AGENTS = new Set(['herald', 'hermes', 'edi', 'jarvis', 'flux', 'echo', 'mercury', 'pipe', 'navi', 'link', 'prometheus', 'tempest']);
       const callFn = TOOL_AGENTS.has(agent) ? callAgentWithTools : callAgent;
-      const response = await callFn(this.config, agent, cleanText);
+      // Cross-channel user memory + audit log + auto-learn (v16.0.13)
+      let discordMsg = cleanText;
+      try {
+        const { buildMemoryPrefix, autoLearnFromTurn } = await import('./user-memory.mjs');
+        const memPrefix = buildMemoryPrefix();
+        if (memPrefix) discordMsg = memPrefix + discordMsg;
+        autoLearnFromTurn(cleanText, this.config).catch(() => null);
+      } catch {}
+      try {
+        const auditNote = _readGlobalAudit(15);
+        if (auditNote.length > 0) {
+          const lines = auditNote.slice(-10).map(e => {
+            const t = new Date(e.ts).toLocaleString('it-IT', { day: '2-digit', month: 'short', hour: '2-digit', minute: '2-digit' });
+            const st = e.success === false ? '✗' : '✓';
+            return `- ${t} · ${e.tool} ${st} · ${e.summary || ''}`;
+          }).join('\n');
+          discordMsg = `[AZIONI RECENTI da altri canali]\n${lines}\n[FINE]\n\n${discordMsg}`;
+        }
+      } catch {}
+      const response = await callFn(this.config, agent, discordMsg);
       // Discord message limit is 2000 chars
       const truncated = response.length > 1900

package/src/services/user-memory.mjs CHANGED Viewed

@@ -78,3 +78,51 @@ export function buildMemoryPrefix() {
   }
   return `[USER MEMORY — persistent across all conversations]\n${raw}\n[END USER MEMORY]\n\n`;
 }
+/**
+ * Auto-extract memorable facts from a user turn and append them to memory.
+ * Mirrors ChatGPT's "Memory" auto-learn: scans the message for explicit
+ * "remember that..." / "ricorda che..." instructions AND for implicit
+ * personal facts (name, location, role, preferences, deadlines, contacts).
+ *
+ * Designed to be CHEAP: runs ONLY when the user message contains a likely
+ * signal ("ricord", "remember", "preferisco", "mi chiamo", "lavoro come",
+ * "ho un appuntamento", "uso sempre", etc.). Skips noise.
+ *
+ * @param {string} userText
+ * @param {object} config — NHA config (needs llm provider)
+ * @returns {Promise<string|null>} the new memory line if learned, else null
+ */
+export async function autoLearnFromTurn(userText, config) {
+  if (!userText || typeof userText !== 'string' || userText.length < 8) return null;
+  // Cheap pre-filter — only call the LLM if the text plausibly contains a fact.
+  const trigger = /\b(ricord[aiy]|memorizz[aiy]|salv[aiy]\s+che|tieni\s+a\s+mente|prefer(isco|isci)|mi\s+chiamo|sono\s+(un|una)\b|lavoro\s+(come|presso|in)\b|abito\s+(a|in)\b|vivo\s+(a|in)\b|uso\s+sempre|preferenza|impostazione|deadline|scadenza|ho\s+un\s+(appuntament|incontro)|il\s+mio\s+(nome|email|telefon|indirizz)|api\s+key|password|remember\s+that|please\s+remember|note\s+that|my\s+name\s+is|i\s+work\s+as|i\s+live\s+in|i\s+prefer|i\s+use\s+always)\b/i;
+  if (!trigger.test(userText)) return null;
+  try {
+    const { callLLM } = await import('./llm.mjs');
+    const systemPrompt =
+      'You are a memory extractor. Read the user message and decide if there is ONE durable fact, preference, or piece of personal context worth remembering across future conversations. ' +
+      'Return STRICT JSON: {"memorable": true|false, "fact": "concise fact in the user language, max 140 chars"} or {"memorable": false}. ' +
+      'Memorable: name, role, location, language preference, communication style, recurring contacts, long-term projects, API keys/IDs (only id, NOT secrets), tools they use, hard preferences. ' +
+      'NOT memorable: greetings, transient questions, one-off tasks, weather, news, anything that expires within a day. ' +
+      'NEVER fabricate facts that the user did not explicitly state.';
+    const raw = await callLLM(config, systemPrompt, userText, { max_tokens: 150, temperature: 0.1 });
+    const m = raw.match(/\{[\s\S]*\}/);
+    if (!m) return null;
+    const parsed = JSON.parse(m[0]);
+    if (!parsed.memorable || !parsed.fact || typeof parsed.fact !== 'string') return null;
+    const fact = parsed.fact.trim().slice(0, 140);
+    if (!fact) return null;
+    // Deduplicate: skip if a near-identical fact is already in memory.
+    const existing = loadUserMemory().toLowerCase();
+    const factLow = fact.toLowerCase();
+    // Very rough dedup: if the first 30 chars of the new fact appear in
+    // memory already, skip. Avoid LLM-driven dedup loop (would be expensive).
+    if (factLow.length > 20 && existing.includes(factLow.slice(0, Math.min(30, factLow.length)))) {
+      return null;
+    }
+    addUserMemory(`(auto) ${fact}`);
+    return fact;
+  } catch { return null; }
+}