npm - nothumanallowed - Versions diffs - 16.0.11 → 16.0.13 - Mend

nothumanallowed 16.0.11 → 16.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +1 -1
package/src/cli.mjs +6 -0
package/src/commands/memory.mjs +70 -0
package/src/constants.mjs +1 -1
package/src/server/routes/chat.mjs +101 -22
package/src/server/routes/config.mjs +18 -0
package/src/services/llm.mjs +55 -2
package/src/services/message-responder.mjs +122 -8
package/src/services/user-memory.mjs +128 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nothumanallowed",
-  "version": "16.0.11",
+  "version": "16.0.13",
   "description": "NotHumanAllowed — 38 AI agents, 80 tools, Studio (visual agentic workflows). Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, Alexandria E2E messaging, GitHub, Notion, Slack, voice chat, free AI (Liara), 28 languages. Zero-dependency CLI.",
   "type": "module",
   "bin": {

package/src/cli.mjs CHANGED Viewed

@@ -18,6 +18,7 @@ import { cmdUI } from './commands/ui.mjs';
 import { cmdGoogle } from './commands/google-auth.mjs';
 import { cmdMicrosoft } from './commands/microsoft-auth.mjs';
 import { cmdScan } from './commands/scan.mjs';
+import { runMemory } from './commands/memory.mjs';
 import { cmdVoice } from './commands/voice.mjs';
 import { cmdPlugin, findPluginForCommand } from './commands/plugin.mjs';
 import { banner, info, ok, warn, fail, C, G, Y, D, W, BOLD, NC, M, B, R } from './ui.mjs';
@@ -86,6 +87,11 @@ export async function main(argv) {
     case 'task':
       return cmdTasks(args);
+    case 'memory':
+    case 'memorize':
+    case 'remember':
+      return runMemory(args);
     case 'ops':
       return cmdOps(args);

package/src/commands/memory.mjs ADDED Viewed

@@ -0,0 +1,70 @@
+/**
+ * `nha memory` — manage persistent user memory.
+ *
+ * Subcommands:
+ *   nha memory add "..."      append a fact
+ *   nha memory list           print current memory
+ *   nha memory edit           open the memory file in $EDITOR
+ *   nha memory clear          wipe everything (with confirmation)
+ *   nha memory path           print the memory file path
+ */
+import {
+  addUserMemory,
+  loadUserMemory,
+  clearUserMemory,
+  getMemoryPath,
+} from '../services/user-memory.mjs';
+import { spawn } from 'child_process';
+export async function runMemory(args) {
+  const sub = args[0];
+  if (!sub || sub === 'list' || sub === 'show') {
+    const text = loadUserMemory();
+    if (!text.trim()) {
+      console.log('Memory empty. Add something with: nha memory add "..."');
+    } else {
+      console.log(text);
+    }
+    return;
+  }
+  if (sub === 'add') {
+    const entry = args.slice(1).join(' ').trim();
+    if (!entry) {
+      console.error('Usage: nha memory add "Fact to remember"');
+      process.exit(1);
+    }
+    addUserMemory(entry);
+    console.log(`✓ Added to memory: ${entry}`);
+    return;
+  }
+  if (sub === 'edit') {
+    const editor = process.env.EDITOR || process.env.VISUAL || 'nano';
+    const proc = spawn(editor, [getMemoryPath()], { stdio: 'inherit' });
+    proc.on('close', (code) => process.exit(code || 0));
+    return;
+  }
+  if (sub === 'clear') {
+    const confirm = args.includes('--yes') || args.includes('-y');
+    if (!confirm) {
+      console.error('This will wipe ALL stored memories. Re-run with --yes to confirm.');
+      process.exit(1);
+    }
+    clearUserMemory();
+    console.log('✓ Memory cleared.');
+    return;
+  }
+  if (sub === 'path') {
+    console.log(getMemoryPath());
+    return;
+  }
+  console.error(`Unknown subcommand: ${sub}`);
+  console.error('Usage: nha memory <add|list|edit|clear|path>');
+  process.exit(1);
+}

package/src/constants.mjs CHANGED Viewed

@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
-export const VERSION = '16.0.11';
+export const VERSION = '16.0.13';
 export const BASE_URL = 'https://nothumanallowed.com/cli';
 export const API_BASE = 'https://nothumanallowed.com/api/v1';

package/src/server/routes/chat.mjs CHANGED Viewed

@@ -219,32 +219,111 @@ export function register(router) {
       enrichedPrompt += `\n\nIMPORTANT: Output language is ${userLang}. Do NOT switch languages mid-response.`;
     }
-    // Rolling context window
+    // ── Conversation history → structured messages[] (Fix 1, v16.0.12) ──
+    // ChatGPT/Claude pass full history as messages[]. We do the same: each
+    // turn keeps its own {role, content} so the model sees a real
+    // conversation, not a concatenated string. The rolling SUMMARY of older
+    // turns (Fix 2) is injected as a context prefix in the system prompt.
     const rawHistory = (body.history || []).map(h => ({
-      role: h.role,
+      role: h.role === 'assistant' ? 'assistant' : 'user',
       content: (h.content || '').replace(/!\[Screenshot\]\(data:image\/[^)]+\)/g, '[Screenshot taken]'),
-    }));
-    const RECENT = 6;
-    const parts = [];
-    if (rawHistory.length > RECENT) {
-      const older = rawHistory.slice(0, -RECENT);
-      const lines = [];
-      for (let i = 0; i < older.length; i += 2) {
-        const u = older[i]?.content?.slice(0, 150)?.replace(/\n/g, ' ') || '';
-        const a = older[i+1]?.content?.slice(0, 200)?.replace(/\n/g, ' ') || '';
-        if (u) lines.push(`- User: "${u.trim()}${u.length >= 150 ? '...' : ''}" → ${a.trim()}${a.length >= 200 ? '...' : ''}`);
+    })).filter(m => m.content);
+    // ── Rolling summary (Fix 2) — TOKEN-based threshold ──
+    // Industry pattern (Claude context compaction, ChatGPT memory): trigger
+    // summary when the OLDER messages would consume more than a budget,
+    // measured in tokens (~chars/4). Provider-aware budget:
+    //   - anthropic / openai / gemini → 24k tokens raw before summary
+    //   - nha (Liara/Qwen 32B 32k ctx) → 8k tokens raw before summary
+    //   - others → 8k as safe default
+    // Plus per-turn cap of MAX_RECENT turns so latency stays bounded.
+    const provider = config.llm?.provider || (config.llm?.apiKey ? 'anthropic' : 'nha');
+    const TOKEN_BUDGET_BY_PROVIDER = {
+      anthropic: 24000, openai: 24000, gemini: 24000,
+      nha: 8000, deepseek: 16000, grok: 16000, mistral: 16000, cohere: 8000,
+    };
+    const tokenBudget = TOKEN_BUDGET_BY_PROVIDER[provider] || 8000;
+    const MAX_RECENT_TURNS = 30;     // hard cap (latency safeguard)
+    const approxTokens = (s) => Math.ceil((s || '').length / 4);
+    let conversationSummary = '';
+    let recentHistory = rawHistory;
+    if (rawHistory.length > 0) {
+      // Walk backwards accumulating tokens until we exceed the budget OR
+      // hit MAX_RECENT_TURNS. Everything BEFORE that index goes into summary.
+      let recentTokens = 0;
+      let splitIdx = 0;
+      for (let i = rawHistory.length - 1; i >= 0; i--) {
+        const t = approxTokens(rawHistory[i].content);
+        if (recentTokens + t > tokenBudget) { splitIdx = i + 1; break; }
+        if (rawHistory.length - i > MAX_RECENT_TURNS) { splitIdx = i + 1; break; }
+        recentTokens += t;
+        splitIdx = i;
+      }
+      recentHistory = rawHistory.slice(splitIdx);
+      const older = rawHistory.slice(0, splitIdx);
+      if (older.length > 0) {
+        // Reuse cached summary when the older slice hasn't grown.
+        let cachedConv = null;
+        if (body.conversationId) {
+          try { cachedConv = loadConversation(body.conversationId); } catch {}
+        }
+        const cached = cachedConv?.rollingSummary;
+        if (cached && cached.coveredTurns === older.length) {
+          conversationSummary = cached.text;
+        } else {
+          // Build summary input in user language. Trim individual turns to
+          // 1200 chars each (older context loses fine-grained details).
+          const summaryInput = older.map(m =>
+            `${m.role === 'user' ? 'User' : 'Assistant'}: ${m.content.slice(0, 1200)}`
+          ).join('\n\n');
+          const langLabel = userLang === 'it' ? 'in italiano' : `in ${userLang}`;
+          try {
+            conversationSummary = await callLLM(
+              config,
+              `You are a conversation summarizer. Summarize ${langLabel} in 200-500 tokens ALL facts, decisions, user preferences, specific data (dates, IDs, names, numbers, file paths, URLs) that emerged. No fluff, only information useful to reconstruct context. Preserve the language the user spoke in.`,
+              summaryInput,
+              { max_tokens: 700, temperature: 0.2 },
+            );
+            // Meta-compress: if the previous cached summary exists AND together
+            // with new content the result would balloon, replace fully with the
+            // new compact one (we just generated it from full older slice).
+            if (cachedConv) {
+              cachedConv.rollingSummary = {
+                text: conversationSummary,
+                coveredTurns: older.length,
+                coveredTokens: older.reduce((a, m) => a + approxTokens(m.content), 0),
+                at: new Date().toISOString(),
+              };
+              try { saveConversation(cachedConv); } catch {}
+            }
+          } catch { /* if summary fails, just skip it — recent history is enough */ }
+        }
       }
-      if (lines.length) parts.push(`[CONVERSATION CONTEXT]\n${lines.join('\n')}\n[END CONTEXT]`);
     }
-    for (const t of rawHistory.slice(-RECENT)) {
-      parts.push(`${t.role === 'user' ? '[User]' : '[Assistant]'} ${t.content.slice(0, 2000)}`);
+    // Inject summary into the system prompt — it's the cheapest way for the
+    // model to see it AND it benefits from prompt caching on Anthropic.
+    if (conversationSummary) {
+      effectiveSystemPrompt = `${effectiveSystemPrompt || ''}\n\n[CONTESTO CONVERSAZIONE PRECEDENTE]\n${conversationSummary}\n[FINE CONTESTO]`;
     }
-    // Prefix the last user turn with an explicit per-message language tag.
-    // System prompts can lose effectiveness over long conversations; the
-    // per-turn tag is the closest hint to the model's first generated token
-    // and is the most reliable trigger for the right language.
-    parts.push(`[User · respond in ${userLang}] ${effectiveMsg}`);
-    const userMessage = parts.join('\n\n');
+    // ── User memory (Fix 3) — persistent across conversations + channels ──
+    // Loaded from ~/.nha/user-memory.md, prepended to the system prompt.
+    try {
+      const { buildMemoryPrefix, autoLearnFromTurn } = await import('../../services/user-memory.mjs');
+      const memPrefix = buildMemoryPrefix();
+      if (memPrefix) effectiveSystemPrompt = `${memPrefix}${effectiveSystemPrompt || ''}`;
+      // Auto-learn — fire and forget, doesn't block the response.
+      autoLearnFromTurn(msg, config).catch(() => null);
+    } catch {}
+    // The final user message — keep the per-turn language tag close to the
+    // model's first generated token.
+    const userMessage = `[User · respond in ${userLang}] ${effectiveMsg}`;
+    // History passed to the provider as proper messages[] (not concatenated).
+    const historyForLLM = recentHistory;
     // Attachments — handle non-streaming
     if (body.imageBase64 || body.pdfBase64 || body.fileContent) {
@@ -298,7 +377,7 @@ export function register(router) {
         clearInterval(heartbeatInterval);
         heartbeatInterval = null;
         sse('token', { content: chunk });
-      });
+      }, { history: historyForLLM });
       const { textParts, actions } = parseActions(fullResponse);
       const toolResults = [];

package/src/server/routes/config.mjs CHANGED Viewed

@@ -32,6 +32,24 @@ export function register(router) {
     sendJSON(res, 200, { ok: true, version: VERSION, ts: Date.now() });
   });
+  // GET /api/audit/query — query the cross-channel audit log.
+  // Optional query params: tool, channel, since (ms timestamp), limit.
+  router.get('/api/audit/query', async (req, res) => {
+    try {
+      const { queryAuditLog } = await import('../../services/message-responder.mjs');
+      const url = new URL(req.url, 'http://localhost');
+      const entries = queryAuditLog({
+        tool: url.searchParams.get('tool') || undefined,
+        channel: url.searchParams.get('channel') || undefined,
+        since: url.searchParams.get('since') ? parseInt(url.searchParams.get('since'), 10) : undefined,
+        limit: parseInt(url.searchParams.get('limit') || '100', 10),
+      });
+      sendJSON(res, 200, { entries });
+    } catch (e) {
+      sendJSON(res, 500, { error: e.message });
+    }
+  });
   // GET /api/version/check
   //
   // Returns three version signals so the UI can distinguish three states:

package/src/services/llm.mjs CHANGED Viewed

@@ -399,11 +399,18 @@ export async function callAnthropic(apiKey, model, systemPrompt, userMessage, st
   const systemBlocks = systemPrompt
     ? [{ type: 'text', text: systemPrompt, cache_control: { type: 'ephemeral' } }]
     : [];
+  // Build conversation messages: optional history[] then the current turn.
+  // history must alternate role: user/assistant/user/... ending with assistant
+  // (or be empty). The current userMessage is appended as the final user turn.
+  const historyMsgs = Array.isArray(opts.history)
+    ? opts.history.filter(m => m && m.role && m.content).map(m => ({ role: m.role, content: String(m.content) }))
+    : [];
+  const messages = [...historyMsgs, { role: 'user', content: userMessage }];
   const body = {
     model: model || 'claude-sonnet-4-20250514',
     max_tokens: opts.max_tokens || 8192,
     system: systemBlocks,
-    messages: [{ role: 'user', content: userMessage }],
+    messages,
     stream,
   };
   if (opts.temperature !== undefined) body.temperature = opts.temperature;
@@ -427,11 +434,15 @@ export async function callAnthropic(apiKey, model, systemPrompt, userMessage, st
 }
 export async function callOpenAI(apiKey, model, systemPrompt, userMessage, stream = false, opts = {}) {
+  const historyMsgs = Array.isArray(opts.history)
+    ? opts.history.filter(m => m && m.role && m.content).map(m => ({ role: m.role, content: String(m.content) }))
+    : [];
   const body = {
     model: model || 'gpt-4o',
     max_tokens: opts.max_tokens || 8192,
     messages: [
       { role: 'system', content: systemPrompt },
+      ...historyMsgs,
       { role: 'user', content: userMessage },
     ],
     stream,
@@ -459,9 +470,16 @@ export async function callGemini(apiKey, model, systemPrompt, userMessage, _stre
   const url = `https://generativelanguage.googleapis.com/v1beta/models/${m}:generateContent?key=${apiKey}`;
   const generationConfig = { maxOutputTokens: opts.max_tokens || 8192 };
   if (opts.temperature !== undefined) generationConfig.temperature = opts.temperature;
+  // Gemini uses 'contents' with role 'user'/'model'. Convert history.
+  const historyContents = Array.isArray(opts.history)
+    ? opts.history.filter(m => m && m.role && m.content).map(m => ({
+        role: m.role === 'assistant' ? 'model' : 'user',
+        parts: [{ text: String(m.content) }],
+      }))
+    : [];
   const body = {
     system_instruction: { parts: [{ text: systemPrompt }] },
-    contents: [{ parts: [{ text: userMessage }] }],
+    contents: [...historyContents, { role: 'user', parts: [{ text: userMessage }] }],
     generationConfig,
   };
   const res = await fetch(url, {
@@ -477,12 +495,23 @@ export async function callGemini(apiKey, model, systemPrompt, userMessage, _stre
   return data.candidates?.[0]?.content?.parts?.[0]?.text || '';
 }
+// OpenAI-compatible history mapper — used by DeepSeek/Grok/Mistral/Cohere.
+function _openaiHistory(opts) {
+  return Array.isArray(opts?.history)
+    ? opts.history.filter(m => m && m.role && m.content).map(m => ({
+        role: m.role === 'assistant' ? 'assistant' : 'user',
+        content: String(m.content),
+      }))
+    : [];
+}
 export async function callDeepSeek(apiKey, model, systemPrompt, userMessage, stream = false, opts = {}) {
   const body = {
     model: model || 'deepseek-chat',
     max_tokens: opts.max_tokens || 8192,
     messages: [
       { role: 'system', content: systemPrompt },
+      ..._openaiHistory(opts),
       { role: 'user', content: userMessage },
     ],
     stream,
@@ -511,6 +540,7 @@ export async function callGrok(apiKey, model, systemPrompt, userMessage, stream
     max_tokens: opts.max_tokens || 8192,
     messages: [
       { role: 'system', content: systemPrompt },
+      ..._openaiHistory(opts),
       { role: 'user', content: userMessage },
     ],
     stream,
@@ -539,6 +569,7 @@ export async function callMistral(apiKey, model, systemPrompt, userMessage, stre
     max_tokens: opts.max_tokens || 8192,
     messages: [
       { role: 'system', content: systemPrompt },
+      ..._openaiHistory(opts),
       { role: 'user', content: userMessage },
     ],
     stream,
@@ -562,10 +593,18 @@ export async function callMistral(apiKey, model, systemPrompt, userMessage, stre
 }
 export async function callCohere(apiKey, model, systemPrompt, userMessage, _stream = false, opts = {}) {
+  // Cohere uses a 'chat_history' array with role: USER/CHATBOT (uppercase).
+  const cohereHistory = Array.isArray(opts.history)
+    ? opts.history.filter(m => m && m.role && m.content).map(m => ({
+        role: m.role === 'assistant' ? 'CHATBOT' : 'USER',
+        message: String(m.content),
+      }))
+    : [];
   const body = {
     model: model || 'command-r-plus',
     max_tokens: opts.max_tokens || 8192,
     preamble: systemPrompt,
+    chat_history: cohereHistory,
     message: userMessage,
   };
   if (opts.temperature !== undefined) body.temperature = opts.temperature;
@@ -664,11 +703,18 @@ export async function callNHA(apiKey, model, systemPrompt, userMessage, stream =
     .replace(/\|\|\(/g, '||(')                  // LDAP (cosmetic, non-breaking)
     .replace(/\)\|\|/g, ')||');                 // LDAP
+  const historyMsgs = Array.isArray(opts.history)
+    ? opts.history.filter(m => m && m.role && m.content).map(m => ({
+        role: m.role === 'assistant' ? 'assistant' : 'user',
+        content: sanitizeForSentinel(String(m.content)),
+      }))
+    : [];
   const body = {
     model: model || '/opt/models/qwen3-32b',
     max_tokens: opts.max_tokens || (thinkingEnabled ? 16384 : 8192),
     messages: [
       { role: 'system', content: sanitizeForSentinel(systemPrompt) },
+      ...historyMsgs,
       { role: 'user', content: sanitizeForSentinel(userMessage) },
     ],
     stream,
@@ -1212,11 +1258,18 @@ export async function callLLMStream(config, systemPrompt, userMessage, onToken,
     // 3. Otherwise default to 8192 (full context for specialist agents)
     const effectiveMaxTokens = opts.max_tokens || (thinkingEnabled ? 8192 : 8192);
+    const nhaHistory = Array.isArray(opts.history)
+      ? opts.history.filter(m => m && m.role && m.content).map(m => ({
+          role: m.role === 'assistant' ? 'assistant' : 'user',
+          content: sanitize(String(m.content)),
+        }))
+      : [];
     const nhaBody = {
       model: model || '/opt/models/qwen3-32b',
       max_tokens: effectiveMaxTokens,
       messages: [
         { role: 'system', content: sanitize(systemPrompt) },
+        ...nhaHistory,
         { role: 'user', content: sanitize(userMessage) },
       ],
       stream: false,

package/src/services/message-responder.mjs CHANGED Viewed

@@ -18,6 +18,67 @@ import path from 'path';
 import os from 'os';
 import { VERSION } from '../constants.mjs';
+// ── Global audit log helpers (Fix 4 v16.0.12) ──
+// Append-only JSONL at ~/.nha/audit-log.jsonl, shared across every channel
+// (telegram, discord, chat web, AWF agents). Lets the user ask "what have you
+// done today?" from any surface and get a consistent answer.
+const _GLOBAL_AUDIT_FILE = path.join(os.homedir(), '.nha', 'audit-log.jsonl');
+const _AUDIT_MAX_LINES = 10000;          // rotate at 10k lines (~1MB JSONL)
+const _AUDIT_ARCHIVE_PREFIX = 'audit-log-';
+function _rotateAuditIfNeeded() {
+  try {
+    if (!fs.existsSync(_GLOBAL_AUDIT_FILE)) return;
+    const stat = fs.statSync(_GLOBAL_AUDIT_FILE);
+    // Quick check: skip the line count unless file is bigger than ~1.5MB
+    if (stat.size < 1_500_000) return;
+    const text = fs.readFileSync(_GLOBAL_AUDIT_FILE, 'utf-8');
+    const lines = text.split('\n').filter(Boolean);
+    if (lines.length <= _AUDIT_MAX_LINES) return;
+    // Archive older half, keep most recent _AUDIT_MAX_LINES.
+    const tail = lines.slice(-_AUDIT_MAX_LINES);
+    const archived = lines.slice(0, lines.length - _AUDIT_MAX_LINES);
+    const ts = new Date().toISOString().replace(/[:.]/g, '-');
+    const archiveFile = path.join(path.dirname(_GLOBAL_AUDIT_FILE), `${_AUDIT_ARCHIVE_PREFIX}${ts}.jsonl`);
+    fs.writeFileSync(archiveFile, archived.join('\n') + '\n');
+    fs.writeFileSync(_GLOBAL_AUDIT_FILE, tail.join('\n') + '\n');
+  } catch {}
+}
+function _appendGlobalAudit(entry) {
+  try {
+    fs.mkdirSync(path.dirname(_GLOBAL_AUDIT_FILE), { recursive: true });
+    fs.appendFileSync(_GLOBAL_AUDIT_FILE, JSON.stringify(entry) + '\n');
+    // Rotate occasionally (cheap stat-check; full scan only if size > 1.5MB).
+    if (Math.random() < 0.01) _rotateAuditIfNeeded();
+  } catch {}
+}
+function _readGlobalAudit(limitTail = 100) {
+  try {
+    if (!fs.existsSync(_GLOBAL_AUDIT_FILE)) return [];
+    const text = fs.readFileSync(_GLOBAL_AUDIT_FILE, 'utf-8');
+    const lines = text.split('\n').filter(Boolean);
+    return lines.slice(-limitTail)
+      .map(l => { try { return JSON.parse(l); } catch { return null; } })
+      .filter(Boolean);
+  } catch { return []; }
+}
+/**
+ * Query the audit log with filters. Exported for the HTTP /api/audit/query
+ * endpoint. Supports filtering by tool, channel, since timestamp.
+ */
+export function queryAuditLog({ tool, channel, since, limit = 100 } = {}) {
+  const all = _readGlobalAudit(10000);
+  return all.filter(e => {
+    if (tool && e.tool !== tool) return false;
+    if (channel && e.channel !== channel) return false;
+    if (since && e.ts < since) return false;
+    return true;
+  }).slice(-limit);
+}
 // ── Agent Routing (keyword-based, zero LLM calls) ───────────────────────────
 const ROUTING_TABLE = [
@@ -1178,6 +1239,18 @@ class TelegramResponder {
       const auditNote = this._renderAuditForPrompt(chatId);
       if (auditNote) enrichedMessage = auditNote + enrichedMessage;
+      // ── User memory (Fix 3+D v16.0.13) — cross-channel persistent context.
+      // Same memory file that's used by the chat web UI. The user can
+      // `nha memory add "I prefer concise answers"` once and EVERY channel
+      // honors it.
+      try {
+        const { buildMemoryPrefix, autoLearnFromTurn } = await import('./user-memory.mjs');
+        const memPrefix = buildMemoryPrefix();
+        if (memPrefix) enrichedMessage = memPrefix + enrichedMessage;
+        // Auto-learn — fire and forget, doesn't block the response.
+        autoLearnFromTurn(cleanText, this.config).catch(() => null);
+      } catch {}
       if (TOOL_AGENTS.has(agent)) {
         const result = await callAgentWithTools(this.config, agent, enrichedMessage, detectedLang, preHistory);
         responseText = result.text;
@@ -1449,21 +1522,43 @@ class TelegramResponder {
   _recordAudit(chatId, entry) {
     const ctx = this._lastContextByChatId[chatId] || (this._lastContextByChatId[chatId] = {});
     if (!Array.isArray(ctx.auditLog)) ctx.auditLog = [];
-    ctx.auditLog.push({ ts: Date.now(), ...entry });
+    const enriched = { ts: Date.now(), channel: chatId, ...entry };
+    ctx.auditLog.push(enriched);
     if (ctx.auditLog.length > 50) ctx.auditLog = ctx.auditLog.slice(-50);
     this._persistContext();
+    // ── Global audit log (Fix 4 v16.0.12) ──
+    // Append-only JSONL at ~/.nha/audit-log.jsonl shared across every channel
+    // (telegram / discord / chat web / AWF agent). Lets the user ask
+    // "what have you done today?" from any surface and get the same answer.
+    try {
+      _appendGlobalAudit(enriched);
+    } catch {}
   }
   _renderAuditForPrompt(chatId, maxEntries = 10) {
+    // Pull from BOTH the per-channel context AND the global log so the model
+    // sees actions made via a different channel too.
     const ctx = this._lastContextByChatId[chatId];
-    if (!ctx || !Array.isArray(ctx.auditLog) || ctx.auditLog.length === 0) return '';
-    const recent = ctx.auditLog.slice(-maxEntries);
+    const local = ctx?.auditLog || [];
+    let globalEntries = [];
+    try { globalEntries = _readGlobalAudit(100); } catch {}
+    // Merge + de-dupe by (ts, tool, summary), keep most recent.
+    const seen = new Set();
+    const merged = [...local, ...globalEntries].sort((a, b) => a.ts - b.ts).filter(e => {
+      const k = `${e.ts}|${e.tool}|${e.summary || ''}`;
+      if (seen.has(k)) return false;
+      seen.add(k);
+      return true;
+    });
+    if (merged.length === 0) return '';
+    const recent = merged.slice(-maxEntries);
     const lines = recent.map(e => {
       const time = new Date(e.ts).toLocaleString('it-IT', { day: '2-digit', month: 'short', hour: '2-digit', minute: '2-digit' });
       const status = e.success === false ? '✗ FALLITA' : '✓ OK';
-      return `- ${time} · ${e.tool} · ${status} · ${e.summary || ''}`;
+      const chan = e.channel && e.channel !== chatId ? ` [via ${String(e.channel).slice(0, 20)}]` : '';
+      return `- ${time} · ${e.tool} · ${status} · ${e.summary || ''}${chan}`;
     });
-    return `\n\n[AZIONI RECENTI ESEGUITE IN QUESTA CONVERSAZIONE — fonte di verità sui fatti già accaduti]\n${lines.join('\n')}\n[FINE AZIONI RECENTI]\n`;
+    return `\n\n[AZIONI RECENTI ESEGUITE — fonte di verità sui fatti già accaduti su QUALSIASI canale (Chat, Telegram, Discord, AWF)]\n${lines.join('\n')}\n[FINE AZIONI RECENTI]\n`;
   }
   _formatDateIT(isoDate) {
@@ -1943,7 +2038,7 @@ class TelegramResponder {
         // Clear the pending state so we don't double-delete on next yes.
         delete this._lastContextByChatId[chatId].pendingDeleteEvents;
         delete this._lastContextByChatId[chatId].lastCalendarEvents;
-        try { (await import('./telegram-context.mjs')).saveTelegramContext(this._lastContextByChatId); } catch {}
+        try { saveTelegramContext(this._lastContextByChatId); } catch {}
         const subject = eligible.length === 1 ? `"${eligible[0].summary}"` : `${eligible.length} appuntamenti`;
         const lines = [`Ho cancellato ${subject}.`];
@@ -2150,7 +2245,7 @@ class TelegramResponder {
               lastCalendarListAt: Date.now(),
               lastCalendarSource: { tool: toolName, args },
             };
-            try { (await import('./telegram-context.mjs')).saveTelegramContext(this._lastContextByChatId); } catch {}
+            try { saveTelegramContext(this._lastContextByChatId); } catch {}
           }
           return { action: actionKey, success: true, message: String(out) };
         } catch (e) { return { action: actionKey, success: false, message: `Errore: ${e.message}` }; }
@@ -2742,7 +2837,26 @@ class DiscordResponder {
       // Tool-capable agents use the full tool execution loop
       const TOOL_AGENTS = new Set(['herald', 'hermes', 'edi', 'jarvis', 'flux', 'echo', 'mercury', 'pipe', 'navi', 'link', 'prometheus', 'tempest']);
       const callFn = TOOL_AGENTS.has(agent) ? callAgentWithTools : callAgent;
-      const response = await callFn(this.config, agent, cleanText);
+      // Cross-channel user memory + audit log + auto-learn (v16.0.13)
+      let discordMsg = cleanText;
+      try {
+        const { buildMemoryPrefix, autoLearnFromTurn } = await import('./user-memory.mjs');
+        const memPrefix = buildMemoryPrefix();
+        if (memPrefix) discordMsg = memPrefix + discordMsg;
+        autoLearnFromTurn(cleanText, this.config).catch(() => null);
+      } catch {}
+      try {
+        const auditNote = _readGlobalAudit(15);
+        if (auditNote.length > 0) {
+          const lines = auditNote.slice(-10).map(e => {
+            const t = new Date(e.ts).toLocaleString('it-IT', { day: '2-digit', month: 'short', hour: '2-digit', minute: '2-digit' });
+            const st = e.success === false ? '✗' : '✓';
+            return `- ${t} · ${e.tool} ${st} · ${e.summary || ''}`;
+          }).join('\n');
+          discordMsg = `[AZIONI RECENTI da altri canali]\n${lines}\n[FINE]\n\n${discordMsg}`;
+        }
+      } catch {}
+      const response = await callFn(this.config, agent, discordMsg);
       // Discord message limit is 2000 chars
       const truncated = response.length > 1900

package/src/services/user-memory.mjs ADDED Viewed

@@ -0,0 +1,128 @@
+/**
+ * User memory — persistent across conversations and channels.
+ *
+ * Same idea as ChatGPT's "Memory" feature: a small Markdown file at
+ * ~/.nha/user-memory.md is loaded and prepended to the system prompt of
+ * every chat / Telegram / Discord / AWF agent call.
+ *
+ * The file is fully owned by the user — no telemetry, never uploaded.
+ * Stored as plain Markdown so it stays human-readable and editable.
+ */
+import fs from 'fs';
+import path from 'path';
+import { NHA_DIR } from '../constants.mjs';
+const MEMORY_FILE = path.join(NHA_DIR, 'user-memory.md');
+const MAX_MEMORY_SIZE = 8000; // chars — prevents prompt explosion
+function ensureFile() {
+  if (!fs.existsSync(NHA_DIR)) fs.mkdirSync(NHA_DIR, { recursive: true });
+  if (!fs.existsSync(MEMORY_FILE)) {
+    const header = `# User Memory\n\n` +
+      `Things NHA should remember about you, across all conversations and channels.\n` +
+      `Edit this file freely, or use \`nha memory add "..."\` to append.\n\n`;
+    fs.writeFileSync(MEMORY_FILE, header);
+  }
+}
+/** Load the full memory file content (trimmed to MAX size). */
+export function loadUserMemory() {
+  try {
+    if (!fs.existsSync(MEMORY_FILE)) return '';
+    const text = fs.readFileSync(MEMORY_FILE, 'utf-8');
+    if (text.length <= MAX_MEMORY_SIZE) return text;
+    // Keep the most recent entries (tail) if it grows too large.
+    return text.slice(-MAX_MEMORY_SIZE);
+  } catch { return ''; }
+}
+/** Append a single fact/preference to the memory file. */
+export function addUserMemory(entry) {
+  if (!entry || typeof entry !== 'string') return false;
+  ensureFile();
+  const trimmed = entry.trim();
+  if (!trimmed) return false;
+  const timestamp = new Date().toISOString().slice(0, 10);
+  const line = `- [${timestamp}] ${trimmed}\n`;
+  fs.appendFileSync(MEMORY_FILE, line);
+  return true;
+}
+/** Replace the entire memory content (used by `nha memory edit`). */
+export function setUserMemory(text) {
+  ensureFile();
+  fs.writeFileSync(MEMORY_FILE, text);
+}
+/** Wipe all memories. */
+export function clearUserMemory() {
+  if (fs.existsSync(MEMORY_FILE)) fs.unlinkSync(MEMORY_FILE);
+}
+/** Get the memory file path (for the `nha memory edit` command to open). */
+export function getMemoryPath() {
+  return MEMORY_FILE;
+}
+/**
+ * Build a system-prompt prefix block for the user memory. Returns empty
+ * string when there's nothing to inject (no file, or only the header).
+ * The prefix is wrapped in delimited markers so it doesn't bleed into
+ * the rest of the prompt and the model knows it's persistent context.
+ */
+export function buildMemoryPrefix() {
+  const raw = loadUserMemory().trim();
+  if (!raw || raw.replace(/^#.*$/gm, '').replace(/^Things NHA.*$/gm, '').trim().length === 0) {
+    return '';
+  }
+  return `[USER MEMORY — persistent across all conversations]\n${raw}\n[END USER MEMORY]\n\n`;
+}
+/**
+ * Auto-extract memorable facts from a user turn and append them to memory.
+ * Mirrors ChatGPT's "Memory" auto-learn: scans the message for explicit
+ * "remember that..." / "ricorda che..." instructions AND for implicit
+ * personal facts (name, location, role, preferences, deadlines, contacts).
+ *
+ * Designed to be CHEAP: runs ONLY when the user message contains a likely
+ * signal ("ricord", "remember", "preferisco", "mi chiamo", "lavoro come",
+ * "ho un appuntamento", "uso sempre", etc.). Skips noise.
+ *
+ * @param {string} userText
+ * @param {object} config — NHA config (needs llm provider)
+ * @returns {Promise<string|null>} the new memory line if learned, else null
+ */
+export async function autoLearnFromTurn(userText, config) {
+  if (!userText || typeof userText !== 'string' || userText.length < 8) return null;
+  // Cheap pre-filter — only call the LLM if the text plausibly contains a fact.
+  const trigger = /\b(ricord[aiy]|memorizz[aiy]|salv[aiy]\s+che|tieni\s+a\s+mente|prefer(isco|isci)|mi\s+chiamo|sono\s+(un|una)\b|lavoro\s+(come|presso|in)\b|abito\s+(a|in)\b|vivo\s+(a|in)\b|uso\s+sempre|preferenza|impostazione|deadline|scadenza|ho\s+un\s+(appuntament|incontro)|il\s+mio\s+(nome|email|telefon|indirizz)|api\s+key|password|remember\s+that|please\s+remember|note\s+that|my\s+name\s+is|i\s+work\s+as|i\s+live\s+in|i\s+prefer|i\s+use\s+always)\b/i;
+  if (!trigger.test(userText)) return null;
+  try {
+    const { callLLM } = await import('./llm.mjs');
+    const systemPrompt =
+      'You are a memory extractor. Read the user message and decide if there is ONE durable fact, preference, or piece of personal context worth remembering across future conversations. ' +
+      'Return STRICT JSON: {"memorable": true|false, "fact": "concise fact in the user language, max 140 chars"} or {"memorable": false}. ' +
+      'Memorable: name, role, location, language preference, communication style, recurring contacts, long-term projects, API keys/IDs (only id, NOT secrets), tools they use, hard preferences. ' +
+      'NOT memorable: greetings, transient questions, one-off tasks, weather, news, anything that expires within a day. ' +
+      'NEVER fabricate facts that the user did not explicitly state.';
+    const raw = await callLLM(config, systemPrompt, userText, { max_tokens: 150, temperature: 0.1 });
+    const m = raw.match(/\{[\s\S]*\}/);
+    if (!m) return null;
+    const parsed = JSON.parse(m[0]);
+    if (!parsed.memorable || !parsed.fact || typeof parsed.fact !== 'string') return null;
+    const fact = parsed.fact.trim().slice(0, 140);
+    if (!fact) return null;
+    // Deduplicate: skip if a near-identical fact is already in memory.
+    const existing = loadUserMemory().toLowerCase();
+    const factLow = fact.toLowerCase();
+    // Very rough dedup: if the first 30 chars of the new fact appear in
+    // memory already, skip. Avoid LLM-driven dedup loop (would be expensive).
+    if (factLow.length > 20 && existing.includes(factLow.slice(0, Math.min(30, factLow.length)))) {
+      return null;
+    }
+    addUserMemory(`(auto) ${fact}`);
+    return fact;
+  } catch { return null; }
+}